bitkeeper revision 1.1041.6.6 (40e96d3bioFNWNS55cowRl9PXLQZ9Q)
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Mon, 5 Jul 2004 15:01:15 +0000 (15:01 +0000)
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Mon, 5 Jul 2004 15:01:15 +0000 (15:01 +0000)
More x86-64 stuff.

29 files changed:
.rootkeys
xen/arch/x86/Makefile
xen/arch/x86/Rules.mk
xen/arch/x86/apic.c
xen/arch/x86/domain.c [new file with mode: 0644]
xen/arch/x86/domain_page.c [deleted file]
xen/arch/x86/entry.S [deleted file]
xen/arch/x86/irq.c
xen/arch/x86/mm.c [deleted file]
xen/arch/x86/mpparse.c
xen/arch/x86/process.c [deleted file]
xen/arch/x86/rwlock.c
xen/arch/x86/setup.c
xen/arch/x86/smpboot.c
xen/arch/x86/time.c
xen/arch/x86/trampoline.S
xen/arch/x86/traps.c
xen/arch/x86/usercopy.c [deleted file]
xen/arch/x86/x86_32/domain_page.c [new file with mode: 0644]
xen/arch/x86/x86_32/entry.S [new file with mode: 0644]
xen/arch/x86/x86_32/mm.c [new file with mode: 0644]
xen/arch/x86/x86_32/usercopy.c [new file with mode: 0644]
xen/arch/x86/x86_32/xen.lds [new file with mode: 0644]
xen/arch/x86/x86_64/entry.S [new file with mode: 0644]
xen/arch/x86/x86_64/usercopy.c [new file with mode: 0644]
xen/arch/x86/x86_64/xen.lds [new file with mode: 0644]
xen/arch/x86/xen.lds [deleted file]
xen/include/asm-x86/config.h
xen/include/asm-x86/irq.h

index c575ef31d3f8a60213a49083b06d49677d8e2006..3adfa562aaeadb44dc6b0d6fda07475394a0ac42 100644 (file)
--- a/.rootkeys
+++ b/.rootkeys
 3ddb79bcSC_LvnmFlX-T5iTgaR0SKg xen/arch/x86/boot/x86_32.S
 40e42bdbNu4MjI750THP_8J1S-Sa0g xen/arch/x86/boot/x86_64.S
 3ddb79bcUrk2EIaM5VsT6wUudH1kkg xen/arch/x86/delay.c
-3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/arch/x86/domain_page.c
-3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/x86/entry.S
+3ddb79bc1_2bAt67x9MFCP4AZrQnvQ xen/arch/x86/domain.c
 3ddb79bcY5zW7KhvI9gvfuPi3ZumEg xen/arch/x86/extable.c
 3fe443fdDDb0Sw6NQBCk4GQapayfTA xen/arch/x86/flushtlb.c
 3ddb79bcesE5E-lS4QhRhlqXxqj9cA xen/arch/x86/i387.c
 3ddb79bcBit4xJXbwtX0kb1hh2uO1Q xen/arch/x86/idle0_task.c
 3ddb79bcKIkRR0kqWaJhe5VUDkMdxg xen/arch/x86/io_apic.c
 3ddb79bdqfIcjkz_h9Hvtp8Tk_19Zw xen/arch/x86/irq.c
-3ddb79bcHwuCQDjBICDTSis52hWguw xen/arch/x86/mm.c
 3ddb79bdS4UeWWXDH-FaBKqcpMFcnw xen/arch/x86/mpparse.c
 3f12cff65EV3qOG2j37Qm0ShgvXGRw xen/arch/x86/nmi.c
 3ddb79bdHe6_Uij4-glW91vInNtBYQ xen/arch/x86/pci-irq.c
 3ddb79bdIKgipvGoqExEQ7jawfVowA xen/arch/x86/pci-x86.h
 40a4dfced2dnSzbKgJFlD3chKHexjQ xen/arch/x86/pdb-linux.c
 4022a73czgX7d-2zfF_cb33oVemApQ xen/arch/x86/pdb-stub.c
-3ddb79bc1_2bAt67x9MFCP4AZrQnvQ xen/arch/x86/process.c
 3ddb79bc7KxGCEJsgBnkDX7XjD_ZEQ xen/arch/x86/rwlock.c
 3ddb79bcrD6Z_rUvSDgrvjyb4846Eg xen/arch/x86/setup.c
 3ddb79bcSx2e8JSR3pdSGa8x1ScYzA xen/arch/x86/smp.c
 3ddb79bc-Udq7ol-NX4q9XsYnN7A2Q xen/arch/x86/time.c
 3ddb79bccYVzXZJyVaxuv5T42Z1Fsw xen/arch/x86/trampoline.S
 3ddb79bcOftONV9h4QCxXOfiT0h91w xen/arch/x86/traps.c
-3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen/arch/x86/usercopy.c
-3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen/arch/x86/xen.lds
+3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/arch/x86/x86_32/domain_page.c
+3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/x86/x86_32/entry.S
+3ddb79bcHwuCQDjBICDTSis52hWguw xen/arch/x86/x86_32/mm.c
+3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen/arch/x86/x86_32/usercopy.c
+3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen/arch/x86/x86_32/xen.lds
+40e96d3aLDI-nViMuYneD7VKYlZrVg xen/arch/x86/x86_64/entry.S
+40e96d3ahBTZqbTViInnq0lM03vs7A xen/arch/x86/x86_64/usercopy.c
+40e96d3akN3Hu_J5Bk-WXD8OGscrYQ xen/arch/x86/x86_64/xen.lds
 3ddb79bdff-gj-jFGKjOejeHLqL8Lg xen/common/Makefile
 3e397e66AyyD5fYraAySWuwi9uqSXg xen/common/ac_timer.c
 4022a73c_BbDFd2YJ_NQYVvKX5Oz7w xen/common/debug-linux.c
index f6ef36afcefa58a6df2f529a8580c15d16bc0c1b..039df504ef9b51b13afb9e118407453c10a84066 100644 (file)
@@ -6,6 +6,9 @@ OBJS := $(subst pdb-linux.o,,$(OBJS))
 OBJS := $(subst pdb-stub.o,,$(OBJS))
 endif
 
+OBJS += $(patsubst %.S,%.o,$(wildcard $(TARGET_SUBARCH)/*.S))
+OBJS += $(patsubst %.c,%.o,$(wildcard $(TARGET_SUBARCH)/*.c))
+
 LINK_BASE := 0xFC500000 # Xen is linked here
 LOAD_BASE := 0x00100000 # Xen is loaded here
 
@@ -23,3 +26,5 @@ default: boot/$(TARGET_SUBARCH).o $(OBJS)
 
 clean:
        rm -f *.o *~ core boot/*.o boot/*~ boot/core
+       rm -f $(TARGET_SUBARCH)/*.o $(TARGET_SUBARCH)/*~
+       rm -f $(TARGETSUBARCH)/core
index 5963e99d8039152b998663b19bf6b99acd89ad3b..560b381845afdd6587db6bc36915032c44b5a2af 100644 (file)
@@ -8,7 +8,7 @@ CFLAGS  := -nostdinc -fno-builtin -fno-common -fno-strict-aliasing -O3
 CFLAGS  += -iwithprefix include -Wall -Werror -fomit-frame-pointer
 CFLAGS  += -I$(BASEDIR)/include -Wno-pointer-arith -Wredundant-decls
 
-LDFLAGS := -T xen.lds -N 
+LDFLAGS := -T $(TARGET_SUBARCH)/xen.lds -N 
 
 ifeq ($(TARGET_SUBARCH),x86_32)
 CFLAGS += -m32 -march=i686
index cd1733c17375de6239e76515837d79502988c15a..64974f670df09e1b69b231ecf0688af49eeead78 100644 (file)
@@ -491,9 +491,9 @@ void __init init_apic_mappings(void)
  *****************************************************************************/
 
 /* used for system time scaling */
-static unsigned int bus_freq;
-static u32          bus_cycle;   /* length of one bus cycle in pico-seconds */
-static u32          bus_scale;   /* scaling factor convert ns to bus cycles */
+static unsigned long bus_freq;    /* KAF: pointer-size avoids compile warns. */
+static u32           bus_cycle;   /* length of one bus cycle in pico-seconds */
+static u32           bus_scale;   /* scaling factor convert ns to bus cycles */
 
 /*
  * The timer chip is already set up at HZ interrupts per second here,
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
new file mode 100644 (file)
index 0000000..459a85b
--- /dev/null
@@ -0,0 +1,360 @@
+/*
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/smp.h>
+#include <xen/delay.h>
+#include <xen/softirq.h>
+#include <asm/ptrace.h>
+#include <asm/mc146818rtc.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+#include <asm/i387.h>
+#include <asm/mpspec.h>
+#include <asm/ldt.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+#include <xen/shadow.h>
+
+int hlt_counter;
+
+void disable_hlt(void)
+{
+    hlt_counter++;
+}
+
+void enable_hlt(void)
+{
+    hlt_counter--;
+}
+
+/*
+ * We use this if we don't have any better
+ * idle routine..
+ */
+static void default_idle(void)
+{
+    if ( hlt_counter == 0 )
+    {
+        __cli();
+        if ( !softirq_pending(smp_processor_id()) )
+            safe_halt();
+        else
+            __sti();
+    }
+}
+
+void continue_cpu_idle_loop(void)
+{
+    int cpu = smp_processor_id();
+    for ( ; ; )
+    {
+        irq_stat[cpu].idle_timestamp = jiffies;
+        while ( !softirq_pending(cpu) )
+            default_idle();
+        do_softirq();
+    }
+}
+
+void startup_cpu_idle_loop(void)
+{
+    /* Just some sanity to ensure that the scheduler is set up okay. */
+    ASSERT(current->domain == IDLE_DOMAIN_ID);
+    domain_unpause_by_systemcontroller(current);
+    __enter_scheduler();
+
+    /*
+     * Declares CPU setup done to the boot processor.
+     * Therefore memory barrier to ensure state is visible.
+     */
+    smp_mb();
+    init_idle();
+
+    continue_cpu_idle_loop();
+}
+
+static long no_idt[2];
+static int reboot_mode;
+int reboot_thru_bios = 0;
+
+#ifdef CONFIG_SMP
+int reboot_smp = 0;
+static int reboot_cpu = -1;
+/* shamelessly grabbed from lib/vsprintf.c for readability */
+#define is_digit(c)    ((c) >= '0' && (c) <= '9')
+#endif
+
+
+static inline void kb_wait(void)
+{
+    int i;
+
+    for (i=0; i<0x10000; i++)
+        if ((inb_p(0x64) & 0x02) == 0)
+            break;
+}
+
+
+void machine_restart(char * __unused)
+{
+    extern int opt_noreboot;
+#ifdef CONFIG_SMP
+    int cpuid;
+#endif
+       
+    if ( opt_noreboot )
+    {
+        printk("Reboot disabled on cmdline: require manual reset\n");
+        for ( ; ; ) __asm__ __volatile__ ("hlt");
+    }
+
+#ifdef CONFIG_SMP
+    cpuid = GET_APIC_ID(apic_read(APIC_ID));
+
+    /* KAF: Need interrupts enabled for safe IPI. */
+    __sti();
+
+    if (reboot_smp) {
+
+        /* check to see if reboot_cpu is valid 
+           if its not, default to the BSP */
+        if ((reboot_cpu == -1) ||  
+            (reboot_cpu > (NR_CPUS -1))  || 
+            !(phys_cpu_present_map & (1<<cpuid))) 
+            reboot_cpu = boot_cpu_physical_apicid;
+
+        reboot_smp = 0;  /* use this as a flag to only go through this once*/
+        /* re-run this function on the other CPUs
+           it will fall though this section since we have 
+           cleared reboot_smp, and do the reboot if it is the
+           correct CPU, otherwise it halts. */
+        if (reboot_cpu != cpuid)
+            smp_call_function((void *)machine_restart , NULL, 1, 0);
+    }
+
+    /* if reboot_cpu is still -1, then we want a tradional reboot, 
+       and if we are not running on the reboot_cpu,, halt */
+    if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
+        for (;;)
+            __asm__ __volatile__ ("hlt");
+    }
+    /*
+     * Stop all CPUs and turn off local APICs and the IO-APIC, so
+     * other OSs see a clean IRQ state.
+     */
+    smp_send_stop();
+    disable_IO_APIC();
+#endif
+
+    if(!reboot_thru_bios) {
+        /* rebooting needs to touch the page at absolute addr 0 */
+        *((unsigned short *)__va(0x472)) = reboot_mode;
+        for (;;) {
+            int i;
+            for (i=0; i<100; i++) {
+                kb_wait();
+                udelay(50);
+                outb(0xfe,0x64);         /* pulse reset low */
+                udelay(50);
+            }
+            /* That didn't work - force a triple fault.. */
+            __asm__ __volatile__("lidt %0": "=m" (no_idt));
+            __asm__ __volatile__("int3");
+        }
+    }
+
+    panic("Need to reinclude BIOS reboot code\n");
+}
+
+void machine_halt(void)
+{
+    machine_restart(0);
+}
+
+void machine_power_off(void)
+{
+    machine_restart(0);
+}
+
+#if defined(__i386__)
+
+void new_thread(struct domain *p,
+                unsigned long start_pc,
+                unsigned long start_stack,
+                unsigned long start_info)
+{
+    execution_context_t *ec = &p->shared_info->execution_context;
+
+    /*
+     * Initial register values:
+     *  DS,ES,FS,GS = FLAT_RING1_DS
+     *       CS:EIP = FLAT_RING1_CS:start_pc
+     *       SS:ESP = FLAT_RING1_DS:start_stack
+     *          ESI = start_info
+     *  [EAX,EBX,ECX,EDX,EDI,EBP are zero]
+     */
+    ec->ds = ec->es = ec->fs = ec->gs = ec->ss = FLAT_RING1_DS;
+    ec->cs = FLAT_RING1_CS;
+    ec->eip = start_pc;
+    ec->esp = start_stack;
+    ec->esi = start_info;
+
+    __save_flags(ec->eflags);
+    ec->eflags |= X86_EFLAGS_IF;
+
+    /* No fast trap at start of day. */
+    SET_DEFAULT_FAST_TRAP(&p->thread);
+}
+
+
+/*
+ * This special macro can be used to load a debugging register
+ */
+#define loaddebug(thread,register) \
+               __asm__("movl %0,%%db" #register  \
+                       : /* no output */ \
+                       :"r" (thread->debugreg[register]))
+
+
+void switch_to(struct domain *prev_p, struct domain *next_p)
+{
+    struct thread_struct *next = &next_p->thread;
+    struct tss_struct *tss = init_tss + smp_processor_id();
+    execution_context_t *stack_ec = get_execution_context();
+    int i;
+    
+    __cli();
+
+    /* Switch guest general-register state. */
+    if ( !is_idle_task(prev_p) )
+    {
+        memcpy(&prev_p->shared_info->execution_context, 
+               stack_ec, 
+               sizeof(*stack_ec));
+        unlazy_fpu(prev_p);
+        CLEAR_FAST_TRAP(&prev_p->thread);
+    }
+
+    if ( !is_idle_task(next_p) )
+    {
+        memcpy(stack_ec,
+               &next_p->shared_info->execution_context,
+               sizeof(*stack_ec));
+
+        /*
+         * This is sufficient! If the descriptor DPL differs from CS RPL then 
+         * we'll #GP. If DS, ES, FS, GS are DPL 0 then they'll be cleared 
+         * automatically. If SS RPL or DPL differs from CS RPL then we'll #GP.
+         */
+        if ( (stack_ec->cs & 3) == 0 )
+            stack_ec->cs = FLAT_RING1_CS;
+        if ( (stack_ec->ss & 3) == 0 )
+            stack_ec->ss = FLAT_RING1_DS;
+
+        SET_FAST_TRAP(&next_p->thread);
+
+        /* Switch the guest OS ring-1 stack. */
+        tss->esp1 = next->guestos_sp;
+        tss->ss1  = next->guestos_ss;
+
+        /* Maybe switch the debug registers. */
+        if ( unlikely(next->debugreg[7]) )
+        {
+            loaddebug(next, 0);
+            loaddebug(next, 1);
+            loaddebug(next, 2);
+            loaddebug(next, 3);
+            /* no 4 and 5 */
+            loaddebug(next, 6);
+            loaddebug(next, 7);
+        }
+
+        /* Switch page tables. */
+        write_ptbase(&next_p->mm);
+        tlb_clocktick();
+    }
+
+    if ( unlikely(prev_p->io_bitmap != NULL) || 
+         unlikely(next_p->io_bitmap != NULL) )
+    {
+        if ( next_p->io_bitmap != NULL )
+        {
+            /* Copy in the appropriate parts of the IO bitmap.  We use the
+             * selector to copy only the interesting parts of the bitmap. */
+
+            u64 old_sel = ~0ULL; /* IO bitmap selector for previous task. */
+
+            if ( prev_p->io_bitmap != NULL)
+            {
+                old_sel = prev_p->io_bitmap_sel;
+
+                /* Replace any areas of the IO bitmap that had bits cleared. */
+                for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ )
+                    if ( !test_bit(i, &prev_p->io_bitmap_sel) )
+                        memcpy(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS],
+                               &next_p->io_bitmap[i * IOBMP_SELBIT_LWORDS],
+                               IOBMP_SELBIT_LWORDS * sizeof(unsigned long));
+            }
+
+            /* Copy in any regions of the new task's bitmap that have bits
+             * clear and we haven't already dealt with. */
+            for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ )
+            {
+                if ( test_bit(i, &old_sel)
+                     && !test_bit(i, &next_p->io_bitmap_sel) )
+                    memcpy(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS],
+                           &next_p->io_bitmap[i * IOBMP_SELBIT_LWORDS],
+                           IOBMP_SELBIT_LWORDS * sizeof(unsigned long));
+            }
+
+            tss->bitmap = IO_BITMAP_OFFSET;
+
+       }
+        else
+        {
+            /* In this case, we're switching FROM a task with IO port access,
+             * to a task that doesn't use the IO bitmap.  We set any TSS bits
+             * that might have been cleared, ready for future use. */
+            for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ )
+                if ( !test_bit(i, &prev_p->io_bitmap_sel) )
+                    memset(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS],
+                           0xFF, IOBMP_SELBIT_LWORDS * sizeof(unsigned long));
+
+            /*
+             * a bitmap offset pointing outside of the TSS limit
+             * causes a nicely controllable SIGSEGV if a process
+             * tries to use a port IO instruction. The first
+             * sys_ioperm() call sets up the bitmap properly.
+             */
+            tss->bitmap = INVALID_IO_BITMAP_OFFSET;
+       }
+    }
+
+    set_current(next_p);
+
+    /* Switch GDT and LDT. */
+    __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->mm.gdt));
+    load_LDT(next_p);
+
+    __sti();
+}
+
+
+/* XXX Currently the 'domain' field is ignored! XXX */
+long do_iopl(domid_t domain, unsigned int new_io_pl)
+{
+    execution_context_t *ec = get_execution_context();
+    ec->eflags = (ec->eflags & 0xffffcfff) | ((new_io_pl&3) << 12);
+    return 0;
+}
+
+#endif
diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c
deleted file mode 100644 (file)
index 23b29a0..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-/******************************************************************************
- * domain_page.h
- * 
- * Allow temporary mapping of domain pages. Based on ideas from the
- * Linux PKMAP code -- the copyrights and credits are retained below.
- */
-
-/*
- * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
- *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de *
- * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
- */
-
-#include <xen/config.h>
-#include <xen/sched.h>
-#include <xen/mm.h>
-#include <xen/perfc.h>
-#include <asm/domain_page.h>
-#include <asm/flushtlb.h>
-
-unsigned long *mapcache;
-static unsigned int map_idx, shadow_map_idx[NR_CPUS];
-static spinlock_t map_lock = SPIN_LOCK_UNLOCKED;
-
-/* Use a spare PTE bit to mark entries ready for recycling. */
-#define READY_FOR_TLB_FLUSH (1<<10)
-
-static void flush_all_ready_maps(void)
-{
-    unsigned long *cache = mapcache;
-
-    /* A bit skanky -- depends on having an aligned PAGE_SIZE set of PTEs. */
-    do { if ( (*cache & READY_FOR_TLB_FLUSH) ) *cache = 0; }
-    while ( ((unsigned long)(++cache) & ~PAGE_MASK) != 0 );
-
-    perfc_incrc(domain_page_tlb_flush);
-    local_flush_tlb();
-}
-
-
-void *map_domain_mem(unsigned long pa)
-{
-    unsigned long va;
-    unsigned int idx, cpu = smp_processor_id();
-    unsigned long *cache = mapcache;
-    unsigned long flags;
-
-    perfc_incrc(map_domain_mem_count);
-
-    spin_lock_irqsave(&map_lock, flags);
-
-    /* Has some other CPU caused a wrap? We must flush if so. */
-    if ( map_idx < shadow_map_idx[cpu] )
-    {
-        perfc_incrc(domain_page_tlb_flush);
-        local_flush_tlb();
-    }
-
-    for ( ; ; )
-    {
-        idx = map_idx = (map_idx + 1) & (MAPCACHE_ENTRIES - 1);
-        if ( idx == 0 ) flush_all_ready_maps();
-        if ( cache[idx] == 0 ) break;
-    }
-
-    cache[idx] = (pa & PAGE_MASK) | __PAGE_HYPERVISOR;
-
-    spin_unlock_irqrestore(&map_lock, flags);
-
-    shadow_map_idx[cpu] = idx;
-
-    va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT) + (pa & ~PAGE_MASK);
-    return (void *)va;
-}
-
-void unmap_domain_mem(void *va)
-{
-    unsigned int idx;
-    idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT;
-    mapcache[idx] |= READY_FOR_TLB_FLUSH;
-}
diff --git a/xen/arch/x86/entry.S b/xen/arch/x86/entry.S
deleted file mode 100644 (file)
index f3b6885..0000000
+++ /dev/null
@@ -1,696 +0,0 @@
-/*
- * Hypercall and fault low-level handling routines.
- *
- * Copyright (c) 2002-2004, K A Fraser
- * Copyright (c) 1991, 1992 Linus Torvalds
- */
-
-/*
- * The idea for callbacks to guest OSes
- * ====================================
- *
- * First, we require that all callbacks (either via a supplied
- * interrupt-descriptor-table, or via the special event or failsafe callbacks
- * in the shared-info-structure) are to ring 1. This just makes life easier,
- * in that it means we don't have to do messy GDT/LDT lookups to find
- * out which the privilege-level of the return code-selector. That code
- * would just be a hassle to write, and would need to account for running
- * off the end of the GDT/LDT, for example. For all callbacks we check
- * that the provided
- * return CS is not == __HYPERVISOR_{CS,DS}. Apart from that we're safe as
- * don't allow a guest OS to install ring-0 privileges into the GDT/LDT.
- * It's up to the guest OS to ensure all returns via the IDT are to ring 1.
- * If not, we load incorrect SS/ESP values from the TSS (for ring 1 rather
- * than the correct ring) and bad things are bound to ensue -- IRET is
- * likely to fault, and we may end up killing the domain (no harm can
- * come to Xen, though).
- *      
- * When doing a callback, we check if the return CS is in ring 0. If so,
- * callback is delayed until next return to ring != 0.
- * If return CS is in ring 1, then we create a callback frame
- * starting at return SS/ESP. The base of the frame does an intra-privilege
- * interrupt-return.
- * If return CS is in ring > 1, we create a callback frame starting
- * at SS/ESP taken from appropriate section of the current TSS. The base
- * of the frame does an inter-privilege interrupt-return.
- * 
- * Note that the "failsafe callback" uses a special stackframe:
- * { return_DS, return_ES, return_FS, return_GS, return_EIP,
- *   return_CS, return_EFLAGS[, return_ESP, return_SS] }
- * That is, original values for DS/ES/FS/GS are placed on stack rather than
- * in DS/ES/FS/GS themselves. Why? It saves us loading them, only to have them
- * saved/restored in guest OS. Furthermore, if we load them we may cause
- * a fault if they are invalid, which is a hassle to deal with. We avoid
- * that problem if we don't load them :-) This property allows us to use
- * the failsafe callback as a fallback: if we ever fault on loading DS/ES/FS/GS
- * on return to ring != 0, we can simply package it up as a return via
- * the failsafe callback, and let the guest OS sort it out (perhaps by
- * killing an application process). Note that we also do this for any
- * faulting IRET -- just let the guest OS handle it via the event
- * callback.
- *
- * We terminate a domain in the following cases:
- *  - creating a callback stack frame (due to bad ring-1 stack).
- *  - faulting IRET on entry to failsafe callback handler.
- * So, each domain must keep its ring-1 %ss/%esp and failsafe callback
- * handler in good order (absolutely no faults allowed!).
- */
-
-#include <xen/config.h>
-#include <xen/errno.h>
-#include <hypervisor-ifs/hypervisor-if.h>
-
-EBX            = 0x00
-ECX            = 0x04
-EDX            = 0x08
-ESI            = 0x0C
-EDI            = 0x10
-EBP            = 0x14
-EAX            = 0x18
-DS             = 0x1C
-ES             = 0x20
-FS              = 0x24
-GS              = 0x28
-ORIG_EAX       = 0x2C
-EIP            = 0x30
-CS             = 0x34
-EFLAGS         = 0x38
-OLDESP         = 0x3C
-OLDSS          = 0x40
-
-/* Offsets in domain structure */
-PROCESSOR       =  0
-SHARED_INFO     =  4
-EVENT_SEL       =  8
-EVENT_ADDR      = 12
-FAILSAFE_BUFFER = 16
-FAILSAFE_SEL    = 32
-FAILSAFE_ADDR   = 36
-
-/* Offsets in shared_info_t */
-#define UPCALL_PENDING /* 0 */
-#define UPCALL_MASK       1
-
-/* Offsets in guest_trap_bounce */
-GTB_ERROR_CODE   =  0
-GTB_CR2          =  4
-GTB_FLAGS        =  8
-GTB_CS           = 10
-GTB_EIP          = 12
-GTBF_TRAP        =  1
-GTBF_TRAP_NOCODE =  2
-GTBF_TRAP_CR2    =  4
-                        
-CF_MASK                = 0x00000001
-IF_MASK                = 0x00000200
-NT_MASK                = 0x00004000
-        
-#define SAVE_ALL_NOSEGREGS \
-        cld; \
-        pushl %gs; \
-        pushl %fs; \
-        pushl %es; \
-        pushl %ds; \
-        pushl %eax; \
-        pushl %ebp; \
-        pushl %edi; \
-        pushl %esi; \
-        pushl %edx; \
-        pushl %ecx; \
-        pushl %ebx; \
-
-#define SAVE_ALL \
-        SAVE_ALL_NOSEGREGS \
-        movl $(__HYPERVISOR_DS),%edx; \
-        movl %edx,%ds; \
-        movl %edx,%es; \
-        movl %edx,%fs; \
-        movl %edx,%gs; \
-        sti;
-
-#define GET_CURRENT(reg)   \
-        movl $4096-4, reg; \
-        orl  %esp, reg;    \
-        andl $~3,reg;      \
-        movl (reg),reg;
-
-ENTRY(continue_nonidle_task)
-        GET_CURRENT(%ebx)
-        jmp test_all_events
-
-        ALIGN
-/*
- * HYPERVISOR_multicall(call_list, nr_calls)
- *   Execute a list of 'nr_calls' hypercalls, pointed at by 'call_list'.
- *   This is fairly easy except that:
- *   1. We may fault reading the call list, and must patch that up; and
- *   2. We cannot recursively call HYPERVISOR_multicall, or a malicious
- *      caller could cause our stack to blow up.
- */
-do_multicall:
-        popl  %eax
-        cmpl  $SYMBOL_NAME(multicall_return_from_call),%eax
-        je    multicall_return_from_call
-        pushl %ebx
-        movl  4(%esp),%ebx   /* EBX == call_list */
-        movl  8(%esp),%ecx   /* ECX == nr_calls  */
-multicall_loop:
-        pushl %ecx
-multicall_fault1: 
-        pushl 20(%ebx)      # args[4]
-multicall_fault2: 
-        pushl 16(%ebx)      # args[3]
-multicall_fault3: 
-        pushl 12(%ebx)      # args[2]
-multicall_fault4: 
-        pushl 8(%ebx)       # args[1]
-multicall_fault5: 
-        pushl 4(%ebx)       # args[0]
-multicall_fault6: 
-        movl  (%ebx),%eax   # op
-        andl  $(NR_hypercalls-1),%eax
-        call  *SYMBOL_NAME(hypercall_table)(,%eax,4)
-multicall_return_from_call:
-multicall_fault7:
-        movl  %eax,24(%ebx) # args[5] == result
-        addl  $20,%esp
-        popl  %ecx
-        addl  $(ARGS_PER_MULTICALL_ENTRY*4),%ebx
-        loop  multicall_loop
-        popl  %ebx
-        xorl  %eax,%eax
-        jmp   ret_from_hypercall
-
-.section __ex_table,"a"
-        .align 4
-        .long multicall_fault1, multicall_fixup1
-        .long multicall_fault2, multicall_fixup2
-        .long multicall_fault3, multicall_fixup3
-        .long multicall_fault4, multicall_fixup4
-        .long multicall_fault5, multicall_fixup5
-        .long multicall_fault6, multicall_fixup6
-.previous
-               
-.section .fixup,"ax"
-multicall_fixup6: 
-        addl  $4,%esp
-multicall_fixup5: 
-        addl  $4,%esp
-multicall_fixup4: 
-        addl  $4,%esp
-multicall_fixup3: 
-        addl  $4,%esp
-multicall_fixup2: 
-        addl  $4,%esp
-multicall_fixup1:
-        addl  $4,%esp
-        popl  %ebx
-        movl  $-EFAULT,%eax
-        jmp   ret_from_hypercall
-.previous        
-                
-        ALIGN
-restore_all_guest:
-        # First, may need to restore %ds if clobbered by create_bounce_frame
-        pushl %ss
-        popl  %ds
-        # Second, create a failsafe copy of DS,ES,FS,GS in case any are bad
-        leal  DS(%esp),%esi
-        leal  FAILSAFE_BUFFER(%ebx),%edi
-        movsl
-        movsl
-        movsl
-        movsl
-        # Finally, restore guest registers -- faults will cause failsafe
-        popl %ebx
-       popl %ecx
-       popl %edx
-       popl %esi
-       popl %edi
-       popl %ebp
-       popl %eax
-1:     popl %ds
-2:     popl %es
-3:     popl %fs
-4:     popl %gs
-        addl $4,%esp
-5:      iret
-.section .fixup,"ax"
-10:     subl $4,%esp
-        pushl %gs
-9:      pushl %fs
-8:      pushl %es
-7:      pushl %ds
-6:      pushl %eax
-       pushl %ebp
-       pushl %edi
-       pushl %esi
-       pushl %edx
-       pushl %ecx
-       pushl %ebx
-       pushl %ss
-       popl  %ds
-       pushl %ss
-       popl  %es
-       jmp  failsafe_callback
-.previous
-.section __ex_table,"a"
-       .align 4
-       .long 1b,6b
-       .long 2b,7b
-       .long 3b,8b
-       .long 4b,9b
-       .long 5b,10b
-.previous
-
-/* No special register assumptions */
-failsafe_callback:
-        GET_CURRENT(%ebx)
-        movl PROCESSOR(%ebx),%eax
-        shl  $4,%eax
-        lea  guest_trap_bounce(%eax),%edx
-        movl FAILSAFE_ADDR(%ebx),%eax
-        movl %eax,GTB_EIP(%edx)
-        movl FAILSAFE_SEL(%ebx),%eax
-        movw %ax,GTB_CS(%edx)
-        call create_bounce_frame
-        subl $16,%esi                # add DS/ES/FS/GS to failsafe stack frame
-        leal FAILSAFE_BUFFER(%ebx),%ebp
-        movl  0(%ebp),%eax           # DS
-FAULT1: movl %eax,(%esi) 
-        movl  4(%ebp),%eax           # ES
-FAULT2: movl %eax,4(%esi)
-        movl  8(%ebp),%eax           # FS
-FAULT3: movl %eax,8(%esi) 
-        movl 12(%ebp),%eax           # GS
-FAULT4: movl %eax,12(%esi)
-        movl %esi,OLDESP(%esp)
-        popl %ebx
-        popl %ecx
-        popl %edx
-        popl %esi
-        popl %edi
-        popl %ebp
-        popl %eax
-        addl $20,%esp                # skip DS/ES/FS/GS/ORIG_EAX
-FAULT5: iret 
-
-
-        ALIGN
-# Simple restore -- we should never fault as we we will only interrupt ring 0
-# when sane values have been placed in all registers. The only exception is
-# NMI, which may interrupt before good values have been placed in DS-GS.
-# The NMI return code deals with this problem itself.
-restore_all_xen:
-       popl %ebx
-       popl %ecx
-       popl %edx
-       popl %esi
-       popl %edi
-       popl %ebp
-       popl %eax
-       popl %ds
-       popl %es
-       popl %fs
-       popl %gs
-        addl $4,%esp
-        iret
-
-        ALIGN
-ENTRY(hypercall)
-        pushl %eax                     # save orig_eax
-       SAVE_ALL
-       GET_CURRENT(%ebx)
-       andl $(NR_hypercalls-1),%eax
-       call *SYMBOL_NAME(hypercall_table)(,%eax,4)
-
-ret_from_hypercall:
-        movl %eax,EAX(%esp)            # save the return value
-
-test_all_events:
-        xorl %ecx,%ecx
-        notl %ecx
-        cli                             # tests must not race interrupts
-/*test_softirqs:*/  
-        movl PROCESSOR(%ebx),%eax
-        shl  $6,%eax                    # sizeof(irq_cpustat) == 64
-        test %ecx,SYMBOL_NAME(irq_stat)(%eax,1)
-        jnz  process_softirqs
-/*test_guest_events:*/
-        movl SHARED_INFO(%ebx),%eax
-        testb $0xFF,UPCALL_MASK(%eax)
-        jnz  restore_all_guest
-        testb $0xFF,UPCALL_PENDING(%eax)
-        jz   restore_all_guest
-        movb $1,UPCALL_MASK(%eax)       # Upcalls are masked during delivery
-/*process_guest_events:*/
-        movl PROCESSOR(%ebx),%edx
-        shl  $4,%edx                    # sizeof(guest_trap_bounce) == 16
-        lea  guest_trap_bounce(%edx),%edx
-        movl EVENT_ADDR(%ebx),%eax
-        movl %eax,GTB_EIP(%edx)
-        movl EVENT_SEL(%ebx),%eax
-        movw %ax,GTB_CS(%edx)
-        call create_bounce_frame
-        jmp  restore_all_guest
-
-        ALIGN
-process_softirqs:
-        sti       
-        call SYMBOL_NAME(do_softirq)
-        jmp  test_all_events
-                
-/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK:         */
-/*   {EIP, CS, EFLAGS, [ESP, SS]}                                     */
-/* %edx == guest_trap_bounce, %ebx == task_struct                     */
-/* %eax,%ecx are clobbered. %ds:%esi contain new OLDSS/OLDESP.        */
-create_bounce_frame:        
-        mov  CS+4(%esp),%cl
-        test $2,%cl
-        jz   1f /* jump if returning to an existing ring-1 activation */
-        /* obtain ss/esp from TSS -- no current ring-1 activations */
-        movl PROCESSOR(%ebx),%eax
-        /* next 4 lines multiply %eax by 8320, which is sizeof(tss_struct) */
-        movl %eax, %ecx
-        shll $7, %ecx
-        shll $13, %eax
-        addl %ecx,%eax
-        addl $init_tss + 12,%eax
-        movl (%eax),%esi /* tss->esp1 */
-FAULT6: movl 4(%eax),%ds /* tss->ss1  */
-        /* base of stack frame must contain ss/esp (inter-priv iret) */
-        subl $8,%esi
-        movl OLDESP+4(%esp),%eax
-FAULT7: movl %eax,(%esi) 
-        movl OLDSS+4(%esp),%eax
-FAULT8: movl %eax,4(%esi) 
-        jmp 2f
-1:      /* obtain ss/esp from oldss/oldesp -- a ring-1 activation exists */
-        movl OLDESP+4(%esp),%esi
-FAULT9: movl OLDSS+4(%esp),%ds 
-2:      /* Construct a stack frame: EFLAGS, CS/EIP */
-        subl $12,%esi
-        movl EIP+4(%esp),%eax
-FAULT10:movl %eax,(%esi) 
-        movl CS+4(%esp),%eax
-FAULT11:movl %eax,4(%esi) 
-        movl EFLAGS+4(%esp),%eax
-FAULT12:movl %eax,8(%esi)
-        /* Rewrite our stack frame and return to ring 1. */
-        /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
-        andl $0xfffcbeff,%eax
-        movl %eax,EFLAGS+4(%esp)
-        movl %ds,OLDSS+4(%esp)
-        movl %esi,OLDESP+4(%esp)
-        movzwl %es:GTB_CS(%edx),%eax
-        movl %eax,CS+4(%esp)
-        movl %es:GTB_EIP(%edx),%eax
-        movl %eax,EIP+4(%esp)
-        ret
-        
-                              
-.section __ex_table,"a"
-        .align 4
-        .long FAULT1, crash_domain_fixup3 # Fault writing to ring-1 stack
-        .long FAULT2, crash_domain_fixup3 # Fault writing to ring-1 stack
-        .long FAULT3, crash_domain_fixup3 # Fault writing to ring-1 stack
-        .long FAULT4, crash_domain_fixup3 # Fault writing to ring-1 stack
-        .long FAULT5, crash_domain_fixup1 # Fault executing failsafe iret
-        .long FAULT6, crash_domain_fixup2 # Fault loading ring-1 stack selector
-        .long FAULT7, crash_domain_fixup2 # Fault writing to ring-1 stack
-        .long FAULT8, crash_domain_fixup2 # Fault writing to ring-1 stack
-        .long FAULT9, crash_domain_fixup2 # Fault loading ring-1 stack selector
-        .long FAULT10,crash_domain_fixup2 # Fault writing to ring-1 stack
-        .long FAULT11,crash_domain_fixup2 # Fault writing to ring-1 stack
-        .long FAULT12,crash_domain_fixup2 # Fault writing to ring-1 stack
-        .long FAULT13,crash_domain_fixup3 # Fault writing to ring-1 stack
-        .long FAULT14,crash_domain_fixup3 # Fault writing to ring-1 stack
-.previous
-               
-# This handler kills domains which experience unrecoverable faults.
-.section .fixup,"ax"
-crash_domain_fixup1:
-        subl  $4,%esp
-        SAVE_ALL
-        jmp   domain_crash
-crash_domain_fixup2:
-        addl  $4,%esp                     
-crash_domain_fixup3:
-        pushl %ss
-        popl  %ds
-        jmp   domain_crash
-.previous
-
-        ALIGN
-process_guest_exception_and_events:        
-        movl PROCESSOR(%ebx),%eax
-        shl  $4,%eax
-        lea  guest_trap_bounce(%eax),%edx
-        testb $~0,GTB_FLAGS(%edx)
-        jz   test_all_events
-        call create_bounce_frame        # just the basic frame
-        mov  %es:GTB_FLAGS(%edx),%cl
-        test $GTBF_TRAP_NOCODE,%cl
-        jnz  2f
-        subl $4,%esi                    # push error_code onto guest frame
-        movl %es:GTB_ERROR_CODE(%edx),%eax
-FAULT13:movl %eax,(%esi)
-        test $GTBF_TRAP_CR2,%cl
-        jz   1f
-        subl $4,%esi                    # push %cr2 onto guest frame
-        movl %es:GTB_CR2(%edx),%eax
-FAULT14:movl %eax,(%esi)
-1:      movl %esi,OLDESP(%esp)        
-2:      push %es                        # unclobber %ds
-        pop  %ds 
-        movb $0,GTB_FLAGS(%edx)
-        jmp  test_all_events
-
-        ALIGN
-ENTRY(ret_from_intr)
-       GET_CURRENT(%ebx)
-        movb CS(%esp),%al
-       testb $3,%al    # return to non-supervisor?
-       jne test_all_events
-       jmp restore_all_xen
-
-ENTRY(divide_error)
-       pushl $0                # no error code
-       pushl $ SYMBOL_NAME(do_divide_error)
-       ALIGN
-error_code:
-       pushl %fs
-       pushl %es
-       pushl %ds
-       pushl %eax
-       xorl  %eax,%eax
-       pushl %ebp
-       pushl %edi
-       pushl %esi
-       pushl %edx
-       decl  %eax                      # eax = -1
-       pushl %ecx
-       pushl %ebx
-       cld
-       movl  %gs,%ecx
-       movl  ORIG_EAX(%esp), %esi      # get the error code
-       movl  GS(%esp), %edi            # get the function address
-       movl  %eax, ORIG_EAX(%esp)
-       movl  %ecx, GS(%esp)
-       movl  $(__HYPERVISOR_DS),%edx
-       movl  %edx,%ds
-       movl  %edx,%es
-       movl  %edx,%fs
-       movl  %edx,%gs
-       movl  %esp,%edx
-       pushl %esi                      # push the error code
-       pushl %edx                      # push the pt_regs pointer
-       GET_CURRENT(%ebx)
-       call  *%edi
-        addl  $8,%esp
-        movb  CS(%esp),%al
-       testb $3,%al
-       je    restore_all_xen
-        jmp   process_guest_exception_and_events
-
-ENTRY(coprocessor_error)
-       pushl $0
-       pushl $ SYMBOL_NAME(do_coprocessor_error)
-       jmp error_code
-
-ENTRY(simd_coprocessor_error)
-       pushl $0
-       pushl $ SYMBOL_NAME(do_simd_coprocessor_error)
-       jmp error_code
-
-ENTRY(device_not_available)
-       pushl $0
-        pushl $SYMBOL_NAME(math_state_restore)
-        jmp   error_code
-
-ENTRY(debug)
-       pushl $0
-       pushl $ SYMBOL_NAME(do_debug)
-       jmp error_code
-
-ENTRY(int3)
-       pushl $0
-       pushl $ SYMBOL_NAME(do_int3)
-       jmp error_code
-
-ENTRY(overflow)
-       pushl $0
-       pushl $ SYMBOL_NAME(do_overflow)
-       jmp error_code
-
-ENTRY(bounds)
-       pushl $0
-       pushl $ SYMBOL_NAME(do_bounds)
-       jmp error_code
-
-ENTRY(invalid_op)
-       pushl $0
-       pushl $ SYMBOL_NAME(do_invalid_op)
-       jmp error_code
-
-ENTRY(coprocessor_segment_overrun)
-       pushl $0
-       pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun)
-       jmp error_code
-
-ENTRY(invalid_TSS)
-       pushl $ SYMBOL_NAME(do_invalid_TSS)
-       jmp error_code
-
-ENTRY(segment_not_present)
-       pushl $ SYMBOL_NAME(do_segment_not_present)
-       jmp error_code
-
-ENTRY(stack_segment)
-       pushl $ SYMBOL_NAME(do_stack_segment)
-       jmp error_code
-
-ENTRY(general_protection)
-       pushl $ SYMBOL_NAME(do_general_protection)
-       jmp error_code
-
-ENTRY(alignment_check)
-       pushl $ SYMBOL_NAME(do_alignment_check)
-       jmp error_code
-
-ENTRY(page_fault)
-       pushl $ SYMBOL_NAME(do_page_fault)
-       jmp error_code
-
-ENTRY(machine_check)
-       pushl $0
-       pushl $ SYMBOL_NAME(do_machine_check)
-       jmp error_code
-
-ENTRY(spurious_interrupt_bug)
-       pushl $0
-       pushl $ SYMBOL_NAME(do_spurious_interrupt_bug)
-       jmp error_code
-
-ENTRY(nmi)
-        # Save state but do not trash the segment registers!
-        # We may otherwise be unable to reload them or copy them to ring 1. 
-       pushl %eax
-       SAVE_ALL_NOSEGREGS
-
-        # Check for hardware problems. These are always fatal so we can
-        # reload DS and ES when handling them.
-        inb   $0x61,%al
-        testb $0x80,%al
-        jne   nmi_parity_err
-        testb $0x40,%al
-        jne   nmi_io_err
-        movl  %eax,%ebx
-        
-        # Okay, its almost a normal NMI tick. We can only process it if:
-        #  A. We are the outermost Xen activation (in which case we have
-        #     the selectors safely saved on our stack)
-        #  B. DS-GS all contain sane Xen values.
-        # In all other cases we bail without touching DS-GS, as we have
-        # interrupted an enclosing Xen activation in tricky prologue or
-        # epilogue code.
-        movb  CS(%esp),%al
-       testb $3,%al
-        jne   do_watchdog_tick
-        movl  DS(%esp),%eax
-        cmpw  $(__HYPERVISOR_DS),%ax
-        jne   nmi_badseg
-        movl  ES(%esp),%eax
-        cmpw  $(__HYPERVISOR_DS),%ax
-        jne   nmi_badseg
-        movl  FS(%esp),%eax
-        cmpw  $(__HYPERVISOR_DS),%ax
-        jne   nmi_badseg
-        movl  GS(%esp),%eax
-        cmpw  $(__HYPERVISOR_DS),%ax
-        jne   nmi_badseg
-
-do_watchdog_tick:
-        movl  $(__HYPERVISOR_DS),%edx
-        movl  %edx,%ds
-        movl  %edx,%es
-        movl  %esp,%edx
-       pushl %ebx   # reason
-       pushl %edx   # regs
-        call  SYMBOL_NAME(do_nmi)
-       addl  $8,%esp
-        movb  CS(%esp),%al
-       testb $3,%al
-       je    restore_all_xen
-        GET_CURRENT(%ebx)
-        jmp   restore_all_guest
-
-nmi_badseg:
-       popl %ebx
-       popl %ecx
-       popl %edx
-       popl %esi
-       popl %edi
-       popl %ebp
-       popl %eax
-        addl $20,%esp
-        iret
-
-nmi_parity_err: 
-        movl $(__HYPERVISOR_DS),%edx
-        movl %edx,%ds
-        movl %edx,%es
-        jmp  SYMBOL_NAME(mem_parity_error)
-        
-nmi_io_err: 
-        movl $(__HYPERVISOR_DS),%edx
-        movl %edx,%ds
-        movl %edx,%es
-        jmp  SYMBOL_NAME(io_check_error)                        
-        
-.data
-ENTRY(hypercall_table)
-        .long SYMBOL_NAME(do_set_trap_table)     /*  0 */
-        .long SYMBOL_NAME(do_mmu_update)
-        .long SYMBOL_NAME(do_set_gdt)
-        .long SYMBOL_NAME(do_stack_switch)
-        .long SYMBOL_NAME(do_set_callbacks)
-        .long SYMBOL_NAME(do_fpu_taskswitch)     /*  5 */
-        .long SYMBOL_NAME(do_sched_op)
-        .long SYMBOL_NAME(do_dom0_op)
-        .long SYMBOL_NAME(do_set_debugreg)
-        .long SYMBOL_NAME(do_get_debugreg)
-        .long SYMBOL_NAME(do_update_descriptor)  /* 10 */
-        .long SYMBOL_NAME(do_set_fast_trap)
-        .long SYMBOL_NAME(do_dom_mem_op)
-        .long SYMBOL_NAME(do_multicall)
-        .long SYMBOL_NAME(do_update_va_mapping)
-        .long SYMBOL_NAME(do_set_timer_op)       /* 15 */
-        .long SYMBOL_NAME(do_event_channel_op)
-        .long SYMBOL_NAME(do_xen_version)
-        .long SYMBOL_NAME(do_console_io)
-        .long SYMBOL_NAME(do_physdev_op)
-        .long SYMBOL_NAME(do_update_va_mapping_otherdomain) /* 20 */
-        .rept NR_hypercalls-((.-hypercall_table)/4)
-        .long SYMBOL_NAME(do_ni_hypercall)
-        .endr
index 950138849b3ffe9598b38fe1fa845a133eb93a2f..af0b0de1fbf357a1a386d85b7957a45268673aa3 100644 (file)
@@ -89,7 +89,11 @@ void enable_irq(unsigned int irq)
 
 asmlinkage void do_IRQ(struct pt_regs regs)
 {       
+#if defined(__i386__)
     unsigned int      irq = regs.orig_eax;
+#else
+    unsigned int      irq = 0; /* XXX */
+#endif
     irq_desc_t       *desc = &irq_desc[irq];
     struct irqaction *action;
 
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
deleted file mode 100644 (file)
index 59e304e..0000000
+++ /dev/null
@@ -1,412 +0,0 @@
-/******************************************************************************
- * arch/i386/mm.c
- * 
- * Modifications to Linux original are copyright (c) 2002-2003, K A Fraser
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include <xen/config.h>
-#include <xen/lib.h>
-#include <xen/init.h>
-#include <xen/mm.h>
-#include <asm/page.h>
-#include <asm/flushtlb.h>
-#include <asm/fixmap.h>
-#include <asm/domain_page.h>
-
-static inline void set_pte_phys(unsigned long vaddr,
-                                l1_pgentry_t entry)
-{
-    l2_pgentry_t *l2ent;
-    l1_pgentry_t *l1ent;
-
-    l2ent = &idle_pg_table[l2_table_offset(vaddr)];
-    l1ent = l2_pgentry_to_l1(*l2ent) + l1_table_offset(vaddr);
-    *l1ent = entry;
-
-    /* It's enough to flush this one mapping. */
-    __flush_tlb_one(vaddr);
-}
-
-
-void __set_fixmap(enum fixed_addresses idx, 
-                  l1_pgentry_t entry)
-{
-    unsigned long address = fix_to_virt(idx);
-
-    if ( likely(idx < __end_of_fixed_addresses) )
-        set_pte_phys(address, entry);
-    else
-        printk("Invalid __set_fixmap\n");
-}
-
-
-static void __init fixrange_init(unsigned long start, 
-                                 unsigned long end, 
-                                 l2_pgentry_t *pg_base)
-{
-    l2_pgentry_t *l2e;
-    int i;
-    unsigned long vaddr, page;
-
-    vaddr = start;
-    i = l2_table_offset(vaddr);
-    l2e = pg_base + i;
-
-    for ( ; (i < ENTRIES_PER_L2_PAGETABLE) && (vaddr != end); l2e++, i++ ) 
-    {
-        if ( !l2_pgentry_empty(*l2e) )
-            continue;
-        page = (unsigned long)get_free_page();
-        clear_page(page);
-        *l2e = mk_l2_pgentry(__pa(page) | __PAGE_HYPERVISOR);
-        vaddr += 1 << L2_PAGETABLE_SHIFT;
-    }
-}
-
-void __init paging_init(void)
-{
-    unsigned long addr;
-    void *ioremap_pt;
-    int i;
-
-    /* Idle page table 1:1 maps the first part of physical memory. */
-    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
-        idle_pg_table[i] = 
-            mk_l2_pgentry((i << L2_PAGETABLE_SHIFT) | 
-                          __PAGE_HYPERVISOR | _PAGE_PSE);
-
-    /*
-     * Fixed mappings, only the page table structure has to be
-     * created - mappings will be set by set_fixmap():
-     */
-    addr = FIXADDR_START & ~((1<<L2_PAGETABLE_SHIFT)-1);
-    fixrange_init(addr, 0, idle_pg_table);
-
-    /* Create page table for ioremap(). */
-    ioremap_pt = (void *)get_free_page();
-    clear_page(ioremap_pt);
-    idle_pg_table[IOREMAP_VIRT_START >> L2_PAGETABLE_SHIFT] = 
-        mk_l2_pgentry(__pa(ioremap_pt) | __PAGE_HYPERVISOR);
-
-    /* Create read-only mapping of MPT for guest-OS use. */
-    idle_pg_table[RO_MPT_VIRT_START >> L2_PAGETABLE_SHIFT] =
-        idle_pg_table[RDWR_MPT_VIRT_START >> L2_PAGETABLE_SHIFT];
-    mk_l2_readonly(idle_pg_table + 
-                   (RO_MPT_VIRT_START >> L2_PAGETABLE_SHIFT));
-
-    /* Set up mapping cache for domain pages. */
-    mapcache = (unsigned long *)get_free_page();
-    clear_page(mapcache);
-    idle_pg_table[MAPCACHE_VIRT_START >> L2_PAGETABLE_SHIFT] =
-        mk_l2_pgentry(__pa(mapcache) | __PAGE_HYPERVISOR);
-
-    /* Set up linear page table mapping. */
-    idle_pg_table[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
-        mk_l2_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR);
-
-}
-
-void __init zap_low_mappings(void)
-{
-    int i;
-    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
-        idle_pg_table[i] = mk_l2_pgentry(0);
-    flush_tlb_all_pge();
-}
-
-
-long do_stack_switch(unsigned long ss, unsigned long esp)
-{
-    int nr = smp_processor_id();
-    struct tss_struct *t = &init_tss[nr];
-
-    /* We need to do this check as we load and use SS on guest's behalf. */
-    if ( (ss & 3) == 0 )
-        return -EPERM;
-
-    current->thread.guestos_ss = ss;
-    current->thread.guestos_sp = esp;
-    t->ss1  = ss;
-    t->esp1 = esp;
-
-    return 0;
-}
-
-
-/* Returns TRUE if given descriptor is valid for GDT or LDT. */
-int check_descriptor(unsigned long a, unsigned long b)
-{
-    unsigned long base, limit;
-
-    /* A not-present descriptor will always fault, so is safe. */
-    if ( !(b & _SEGMENT_P) ) 
-        goto good;
-
-    /*
-     * We don't allow a DPL of zero. There is no legitimate reason for 
-     * specifying DPL==0, and it gets rather dangerous if we also accept call 
-     * gates (consider a call gate pointing at another guestos descriptor with 
-     * DPL 0 -- this would get the OS ring-0 privileges).
-     */
-    if ( (b & _SEGMENT_DPL) == 0 )
-        goto bad;
-
-    if ( !(b & _SEGMENT_S) )
-    {
-        /*
-         * System segment:
-         *  1. Don't allow interrupt or trap gates as they belong in the IDT.
-         *  2. Don't allow TSS descriptors or task gates as we don't
-         *     virtualise x86 tasks.
-         *  3. Don't allow LDT descriptors because they're unnecessary and
-         *     I'm uneasy about allowing an LDT page to contain LDT
-         *     descriptors. In any case, Xen automatically creates the
-         *     required descriptor when reloading the LDT register.
-         *  4. We allow call gates but they must not jump to a private segment.
-         */
-
-        /* Disallow everything but call gates. */
-        if ( (b & _SEGMENT_TYPE) != 0xc00 )
-            goto bad;
-
-        /* Can't allow far jump to a Xen-private segment. */
-        if ( !VALID_CODESEL(a>>16) )
-            goto bad;
-
-        /* Reserved bits must be zero. */
-        if ( (b & 0xe0) != 0 )
-            goto bad;
-        
-        /* No base/limit check is needed for a call gate. */
-        goto good;
-    }
-    
-    /* Check that base/limit do not overlap Xen-private space. */
-    base  = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
-    limit = (b&0xf0000) | (a&0xffff);
-    limit++; /* We add one because limit is inclusive. */
-    if ( (b & _SEGMENT_G) )
-        limit <<= 12;
-    if ( ((base + limit) <= base) || 
-         ((base + limit) > PAGE_OFFSET) )
-        goto bad;
-
- good:
-    return 1;
- bad:
-    return 0;
-}
-
-
-long set_gdt(struct domain *p, 
-             unsigned long *frames,
-             unsigned int entries)
-{
-    /* NB. There are 512 8-byte entries per GDT page. */
-    int i, nr_pages = (entries + 511) / 512;
-    unsigned long pfn;
-    struct desc_struct *vgdt;
-
-    /* Check the new GDT. */
-    for ( i = 0; i < nr_pages; i++ )
-    {
-        if ( unlikely(frames[i] >= max_page) ||
-             unlikely(!get_page_and_type(&frame_table[frames[i]], 
-                                         p, PGT_gdt_page)) )
-            goto fail;
-    }
-
-    /* Copy reserved GDT entries to the new GDT. */
-    vgdt = map_domain_mem(frames[0] << PAGE_SHIFT);
-    memcpy(vgdt + FIRST_RESERVED_GDT_ENTRY, 
-           gdt_table + FIRST_RESERVED_GDT_ENTRY, 
-           NR_RESERVED_GDT_ENTRIES*8);
-    unmap_domain_mem(vgdt);
-
-    /* Tear down the old GDT. */
-    for ( i = 0; i < 16; i++ )
-    {
-        if ( (pfn = l1_pgentry_to_pagenr(p->mm.perdomain_pt[i])) != 0 )
-            put_page_and_type(&frame_table[pfn]);
-        p->mm.perdomain_pt[i] = mk_l1_pgentry(0);
-    }
-
-    /* Install the new GDT. */
-    for ( i = 0; i < nr_pages; i++ )
-        p->mm.perdomain_pt[i] =
-            mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR);
-
-    SET_GDT_ADDRESS(p, GDT_VIRT_START);
-    SET_GDT_ENTRIES(p, (entries*8)-1);
-
-    return 0;
-
- fail:
-    while ( i-- > 0 )
-        put_page_and_type(&frame_table[frames[i]]);
-    return -EINVAL;
-}
-
-
-long do_set_gdt(unsigned long *frame_list, unsigned int entries)
-{
-    int nr_pages = (entries + 511) / 512;
-    unsigned long frames[16];
-    long ret;
-
-    if ( (entries <= LAST_RESERVED_GDT_ENTRY) || (entries > 8192) ) 
-        return -EINVAL;
-    
-    if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) )
-        return -EFAULT;
-
-    if ( (ret = set_gdt(current, frames, entries)) == 0 )
-    {
-        local_flush_tlb();
-        __asm__ __volatile__ ("lgdt %0" : "=m" (*current->mm.gdt));
-    }
-
-    return ret;
-}
-
-
-long do_update_descriptor(
-    unsigned long pa, unsigned long word1, unsigned long word2)
-{
-    unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT;
-    struct pfn_info *page;
-    long ret = -EINVAL;
-
-    if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(word1, word2) )
-        return -EINVAL;
-
-    page = &frame_table[pfn];
-    if ( unlikely(!get_page(page, current)) )
-        goto out;
-
-    /* Check if the given frame is in use in an unsafe context. */
-    switch ( page->type_and_flags & PGT_type_mask )
-    {
-    case PGT_gdt_page:
-        /* Disallow updates of Xen-reserved descriptors in the current GDT. */
-        if ( (l1_pgentry_to_pagenr(current->mm.perdomain_pt[0]) == pfn) &&
-             (((pa&(PAGE_SIZE-1))>>3) >= FIRST_RESERVED_GDT_ENTRY) &&
-             (((pa&(PAGE_SIZE-1))>>3) <= LAST_RESERVED_GDT_ENTRY) )
-            goto out;
-        if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
-            goto out;
-        break;
-    case PGT_ldt_page:
-        if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
-            goto out;
-        break;
-    default:
-        if ( unlikely(!get_page_type(page, PGT_writeable_page)) )
-            goto out;
-        break;
-    }
-
-    /* All is good so make the update. */
-    gdt_pent = map_domain_mem(pa);
-    gdt_pent[0] = word1;
-    gdt_pent[1] = word2;
-    unmap_domain_mem(gdt_pent);
-
-    put_page_type(page);
-
-    ret = 0; /* success */
-
- out:
-    put_page(page);
-    return ret;
-}
-
-#ifdef MEMORY_GUARD
-
-void *memguard_init(void *heap_start)
-{
-    l1_pgentry_t *l1;
-    int i, j;
-
-    /* Round the allocation pointer up to a page boundary. */
-    heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) & 
-                          PAGE_MASK);
-
-    /* Memory guarding is incompatible with super pages. */
-    for ( i = 0; i < (xenheap_phys_end >> L2_PAGETABLE_SHIFT); i++ )
-    {
-        l1 = (l1_pgentry_t *)heap_start;
-        heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE);
-        for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
-            l1[j] = mk_l1_pgentry((i << L2_PAGETABLE_SHIFT) |
-                                   (j << L1_PAGETABLE_SHIFT) | 
-                                  __PAGE_HYPERVISOR);
-        idle_pg_table[i] = idle_pg_table[i + l2_table_offset(PAGE_OFFSET)] =
-            mk_l2_pgentry(virt_to_phys(l1) | __PAGE_HYPERVISOR);
-    }
-
-    return heap_start;
-}
-
-static void __memguard_change_range(void *p, unsigned long l, int guard)
-{
-    l1_pgentry_t *l1;
-    l2_pgentry_t *l2;
-    unsigned long _p = (unsigned long)p;
-    unsigned long _l = (unsigned long)l;
-
-    /* Ensure we are dealing with a page-aligned whole number of pages. */
-    ASSERT((_p&PAGE_MASK) != 0);
-    ASSERT((_l&PAGE_MASK) != 0);
-    ASSERT((_p&~PAGE_MASK) == 0);
-    ASSERT((_l&~PAGE_MASK) == 0);
-
-    while ( _l != 0 )
-    {
-        l2  = &idle_pg_table[l2_table_offset(_p)];
-        l1  = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
-        if ( guard )
-            *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) & ~_PAGE_PRESENT);
-        else
-            *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) | _PAGE_PRESENT);
-        _p += PAGE_SIZE;
-        _l -= PAGE_SIZE;
-    }
-}
-
-void memguard_guard_range(void *p, unsigned long l)
-{
-    __memguard_change_range(p, l, 1);
-    local_flush_tlb();
-}
-
-void memguard_unguard_range(void *p, unsigned long l)
-{
-    __memguard_change_range(p, l, 0);
-}
-
-int memguard_is_guarded(void *p)
-{
-    l1_pgentry_t *l1;
-    l2_pgentry_t *l2;
-    unsigned long _p = (unsigned long)p;
-    l2  = &idle_pg_table[l2_table_offset(_p)];
-    l1  = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
-    return !(l1_pgentry_val(*l1) & _PAGE_PRESENT);
-}
-
-#endif
index ae1e1da93bf50cc0fe743a95045d28bbc867234f..d10e4ce9dd343050db3536742007a77ab154fbec 100644 (file)
@@ -458,7 +458,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
        if ((clustered_apic_mode == CLUSTERED_APIC_NUMAQ) && mpc->mpc_oemptr) {
                /* We need to process the oem mpc tables to tell us which quad things are in ... */
                mpc_record = 0;
-               smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr, mpc->mpc_oemsize);
+               smp_read_mpc_oem((struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr, mpc->mpc_oemsize);
                mpc_record = 0;
        }
 
@@ -824,7 +824,7 @@ void __init get_smp_config (void)
                 * Read the physical hardware table.  Anything here will
                 * override the defaults.
                 */
-               if (!smp_read_mpc((void *)mpf->mpf_physptr)) {
+               if (!smp_read_mpc((void *)(unsigned long)mpf->mpf_physptr)) {
                        smp_found_config = 0;
                        printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
                        printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
diff --git a/xen/arch/x86/process.c b/xen/arch/x86/process.c
deleted file mode 100644 (file)
index d3bf5ab..0000000
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- *  linux/arch/i386/kernel/process.c
- *
- *  Copyright (C) 1995  Linus Torvalds
- *
- *  Pentium III FXSR, SSE support
- *     Gareth Hughes <gareth@valinux.com>, May 2000
- */
-
-/*
- * This file handles the architecture-dependent parts of process handling..
- */
-
-#define __KERNEL_SYSCALLS__
-#include <xen/config.h>
-#include <xen/lib.h>
-#include <xen/errno.h>
-#include <xen/sched.h>
-#include <xen/smp.h>
-#include <xen/delay.h>
-#include <xen/softirq.h>
-#include <asm/ptrace.h>
-#include <asm/mc146818rtc.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-#include <asm/desc.h>
-#include <asm/i387.h>
-#include <asm/mpspec.h>
-#include <asm/ldt.h>
-#include <xen/irq.h>
-#include <xen/event.h>
-#include <xen/shadow.h>
-
-int hlt_counter;
-
-void disable_hlt(void)
-{
-    hlt_counter++;
-}
-
-void enable_hlt(void)
-{
-    hlt_counter--;
-}
-
-/*
- * We use this if we don't have any better
- * idle routine..
- */
-static void default_idle(void)
-{
-    if ( hlt_counter == 0 )
-    {
-        __cli();
-        if ( !softirq_pending(smp_processor_id()) )
-            safe_halt();
-        else
-            __sti();
-    }
-}
-
-void continue_cpu_idle_loop(void)
-{
-    int cpu = smp_processor_id();
-    for ( ; ; )
-    {
-        irq_stat[cpu].idle_timestamp = jiffies;
-        while ( !softirq_pending(cpu) )
-            default_idle();
-        do_softirq();
-    }
-}
-
-void startup_cpu_idle_loop(void)
-{
-    /* Just some sanity to ensure that the scheduler is set up okay. */
-    ASSERT(current->domain == IDLE_DOMAIN_ID);
-    domain_unpause_by_systemcontroller(current);
-    __enter_scheduler();
-
-    /*
-     * Declares CPU setup done to the boot processor.
-     * Therefore memory barrier to ensure state is visible.
-     */
-    smp_mb();
-    init_idle();
-
-    continue_cpu_idle_loop();
-}
-
-static long no_idt[2];
-static int reboot_mode;
-int reboot_thru_bios = 0;
-
-#ifdef CONFIG_SMP
-int reboot_smp = 0;
-static int reboot_cpu = -1;
-/* shamelessly grabbed from lib/vsprintf.c for readability */
-#define is_digit(c)    ((c) >= '0' && (c) <= '9')
-#endif
-
-
-static inline void kb_wait(void)
-{
-    int i;
-
-    for (i=0; i<0x10000; i++)
-        if ((inb_p(0x64) & 0x02) == 0)
-            break;
-}
-
-
-void machine_restart(char * __unused)
-{
-    extern int opt_noreboot;
-#ifdef CONFIG_SMP
-    int cpuid;
-#endif
-       
-    if ( opt_noreboot )
-    {
-        printk("Reboot disabled on cmdline: require manual reset\n");
-        for ( ; ; ) __asm__ __volatile__ ("hlt");
-    }
-
-#ifdef CONFIG_SMP
-    cpuid = GET_APIC_ID(apic_read(APIC_ID));
-
-    /* KAF: Need interrupts enabled for safe IPI. */
-    __sti();
-
-    if (reboot_smp) {
-
-        /* check to see if reboot_cpu is valid 
-           if its not, default to the BSP */
-        if ((reboot_cpu == -1) ||  
-            (reboot_cpu > (NR_CPUS -1))  || 
-            !(phys_cpu_present_map & (1<<cpuid))) 
-            reboot_cpu = boot_cpu_physical_apicid;
-
-        reboot_smp = 0;  /* use this as a flag to only go through this once*/
-        /* re-run this function on the other CPUs
-           it will fall though this section since we have 
-           cleared reboot_smp, and do the reboot if it is the
-           correct CPU, otherwise it halts. */
-        if (reboot_cpu != cpuid)
-            smp_call_function((void *)machine_restart , NULL, 1, 0);
-    }
-
-    /* if reboot_cpu is still -1, then we want a tradional reboot, 
-       and if we are not running on the reboot_cpu,, halt */
-    if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
-        for (;;)
-            __asm__ __volatile__ ("hlt");
-    }
-    /*
-     * Stop all CPUs and turn off local APICs and the IO-APIC, so
-     * other OSs see a clean IRQ state.
-     */
-    smp_send_stop();
-    disable_IO_APIC();
-#endif
-
-    if(!reboot_thru_bios) {
-        /* rebooting needs to touch the page at absolute addr 0 */
-        *((unsigned short *)__va(0x472)) = reboot_mode;
-        for (;;) {
-            int i;
-            for (i=0; i<100; i++) {
-                kb_wait();
-                udelay(50);
-                outb(0xfe,0x64);         /* pulse reset low */
-                udelay(50);
-            }
-            /* That didn't work - force a triple fault.. */
-            __asm__ __volatile__("lidt %0": "=m" (no_idt));
-            __asm__ __volatile__("int3");
-        }
-    }
-
-    panic("Need to reinclude BIOS reboot code\n");
-}
-
-void machine_halt(void)
-{
-    machine_restart(0);
-}
-
-void machine_power_off(void)
-{
-    machine_restart(0);
-}
-
-void new_thread(struct domain *p,
-                unsigned long start_pc,
-                unsigned long start_stack,
-                unsigned long start_info)
-{
-    execution_context_t *ec = &p->shared_info->execution_context;
-
-    /*
-     * Initial register values:
-     *  DS,ES,FS,GS = FLAT_RING1_DS
-     *       CS:EIP = FLAT_RING1_CS:start_pc
-     *       SS:ESP = FLAT_RING1_DS:start_stack
-     *          ESI = start_info
-     *  [EAX,EBX,ECX,EDX,EDI,EBP are zero]
-     */
-    ec->ds = ec->es = ec->fs = ec->gs = ec->ss = FLAT_RING1_DS;
-    ec->cs = FLAT_RING1_CS;
-    ec->eip = start_pc;
-    ec->esp = start_stack;
-    ec->esi = start_info;
-
-    __save_flags(ec->eflags);
-    ec->eflags |= X86_EFLAGS_IF;
-
-    /* No fast trap at start of day. */
-    SET_DEFAULT_FAST_TRAP(&p->thread);
-}
-
-
-/*
- * This special macro can be used to load a debugging register
- */
-#define loaddebug(thread,register) \
-               __asm__("movl %0,%%db" #register  \
-                       : /* no output */ \
-                       :"r" (thread->debugreg[register]))
-
-
-void switch_to(struct domain *prev_p, struct domain *next_p)
-{
-    struct thread_struct *next = &next_p->thread;
-    struct tss_struct *tss = init_tss + smp_processor_id();
-    execution_context_t *stack_ec = get_execution_context();
-    int i;
-    
-    __cli();
-
-    /* Switch guest general-register state. */
-    if ( !is_idle_task(prev_p) )
-    {
-        memcpy(&prev_p->shared_info->execution_context, 
-               stack_ec, 
-               sizeof(*stack_ec));
-        unlazy_fpu(prev_p);
-        CLEAR_FAST_TRAP(&prev_p->thread);
-    }
-
-    if ( !is_idle_task(next_p) )
-    {
-        memcpy(stack_ec,
-               &next_p->shared_info->execution_context,
-               sizeof(*stack_ec));
-
-        /*
-         * This is sufficient! If the descriptor DPL differs from CS RPL then 
-         * we'll #GP. If DS, ES, FS, GS are DPL 0 then they'll be cleared 
-         * automatically. If SS RPL or DPL differs from CS RPL then we'll #GP.
-         */
-        if ( (stack_ec->cs & 3) == 0 )
-            stack_ec->cs = FLAT_RING1_CS;
-        if ( (stack_ec->ss & 3) == 0 )
-            stack_ec->ss = FLAT_RING1_DS;
-
-        SET_FAST_TRAP(&next_p->thread);
-
-        /* Switch the guest OS ring-1 stack. */
-        tss->esp1 = next->guestos_sp;
-        tss->ss1  = next->guestos_ss;
-
-        /* Maybe switch the debug registers. */
-        if ( unlikely(next->debugreg[7]) )
-        {
-            loaddebug(next, 0);
-            loaddebug(next, 1);
-            loaddebug(next, 2);
-            loaddebug(next, 3);
-            /* no 4 and 5 */
-            loaddebug(next, 6);
-            loaddebug(next, 7);
-        }
-
-        /* Switch page tables. */
-        write_ptbase(&next_p->mm);
-        tlb_clocktick();
-    }
-
-    if ( unlikely(prev_p->io_bitmap != NULL) || 
-         unlikely(next_p->io_bitmap != NULL) )
-    {
-        if ( next_p->io_bitmap != NULL )
-        {
-            /* Copy in the appropriate parts of the IO bitmap.  We use the
-             * selector to copy only the interesting parts of the bitmap. */
-
-            u64 old_sel = ~0ULL; /* IO bitmap selector for previous task. */
-
-            if ( prev_p->io_bitmap != NULL)
-            {
-                old_sel = prev_p->io_bitmap_sel;
-
-                /* Replace any areas of the IO bitmap that had bits cleared. */
-                for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ )
-                    if ( !test_bit(i, &prev_p->io_bitmap_sel) )
-                        memcpy(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS],
-                               &next_p->io_bitmap[i * IOBMP_SELBIT_LWORDS],
-                               IOBMP_SELBIT_LWORDS * sizeof(unsigned long));
-            }
-
-            /* Copy in any regions of the new task's bitmap that have bits
-             * clear and we haven't already dealt with. */
-            for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ )
-            {
-                if ( test_bit(i, &old_sel)
-                     && !test_bit(i, &next_p->io_bitmap_sel) )
-                    memcpy(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS],
-                           &next_p->io_bitmap[i * IOBMP_SELBIT_LWORDS],
-                           IOBMP_SELBIT_LWORDS * sizeof(unsigned long));
-            }
-
-            tss->bitmap = IO_BITMAP_OFFSET;
-
-       }
-        else
-        {
-            /* In this case, we're switching FROM a task with IO port access,
-             * to a task that doesn't use the IO bitmap.  We set any TSS bits
-             * that might have been cleared, ready for future use. */
-            for ( i = 0; i < sizeof(prev_p->io_bitmap_sel) * 8; i++ )
-                if ( !test_bit(i, &prev_p->io_bitmap_sel) )
-                    memset(&tss->io_bitmap[i * IOBMP_SELBIT_LWORDS],
-                           0xFF, IOBMP_SELBIT_LWORDS * sizeof(unsigned long));
-
-            /*
-             * a bitmap offset pointing outside of the TSS limit
-             * causes a nicely controllable SIGSEGV if a process
-             * tries to use a port IO instruction. The first
-             * sys_ioperm() call sets up the bitmap properly.
-             */
-            tss->bitmap = INVALID_IO_BITMAP_OFFSET;
-       }
-    }
-
-    set_current(next_p);
-
-    /* Switch GDT and LDT. */
-    __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->mm.gdt));
-    load_LDT(next_p);
-
-    __sti();
-}
-
-
-/* XXX Currently the 'domain' field is ignored! XXX */
-long do_iopl(domid_t domain, unsigned int new_io_pl)
-{
-    execution_context_t *ec = get_execution_context();
-    ec->eflags = (ec->eflags & 0xffffcfff) | ((new_io_pl&3) << 12);
-    return 0;
-}
index 2ef7af16b124d02de86a2517a74129cde5ed7dcf..d8c1b7dde5cf5e12d30c06cc5d39c5c67667ca2f 100644 (file)
@@ -6,22 +6,22 @@ asm(
 ".align  4\n"
 ".globl  __write_lock_failed\n"
 "__write_lock_failed:\n"
-"        " LOCK "addl    $" RW_LOCK_BIAS_STR ",(%eax)\n"
+"        " LOCK "addl    $" RW_LOCK_BIAS_STR ",(%"__OP"ax)\n"
 "1:      rep; nop\n"
-"        cmpl    $" RW_LOCK_BIAS_STR ",(%eax)\n"
+"        cmpl    $" RW_LOCK_BIAS_STR ",(%"__OP"ax)\n"
 "        jne     1b\n"
-"        " LOCK "subl    $" RW_LOCK_BIAS_STR ",(%eax)\n"
+"        " LOCK "subl    $" RW_LOCK_BIAS_STR ",(%"__OP"ax)\n"
 "        jnz     __write_lock_failed\n"
 "        ret\n"
 
 ".align  4\n"
 ".globl  __read_lock_failed\n"
 "__read_lock_failed:\n"
-"        lock ; incl     (%eax)\n"
+"        lock ; incl     (%"__OP"ax)\n"
 "1:      rep; nop\n"
-"        cmpl    $1,(%eax)\n"
+"        cmpl    $1,(%"__OP"ax)\n"
 "        js      1b\n"
-"        lock ; decl     (%eax)\n"
+"        lock ; decl     (%"__OP"ax)\n"
 "        js      __read_lock_failed\n"
 "        ret\n"
 );
index ed5f3bba6762a15833624b93e66b95cd1e6f6248..831a27e092214e048b12c9e7f04bb2b718704605 100644 (file)
@@ -54,6 +54,8 @@ int acpi_force __initdata = 0;
 int phys_proc_id[NR_CPUS];
 int logical_proc_id[NR_CPUS];
 
+#if defined(__i386__)
+
 /* Standard macro to see if a specific flag is changeable */
 static inline int flag_is_changeable_p(u32 flag)
 {
@@ -81,6 +83,12 @@ static int __init have_cpuid_p(void)
     return flag_is_changeable_p(X86_EFLAGS_ID);
 }
 
+#elif defined(__x86_64__)
+
+#define have_cpuid_p() (1)
+
+#endif
+
 void __init get_cpu_vendor(struct cpuinfo_x86 *c)
 {
     char *v = c->x86_vendor_id;
@@ -259,6 +267,7 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
 unsigned long cpu_initialized;
 void __init cpu_init(void)
 {
+#if defined(__i386__) /* XXX */
     int nr = smp_processor_id();
     struct tss_struct * t = &init_tss[nr];
 
@@ -297,6 +306,7 @@ void __init cpu_init(void)
     write_ptbase(&current->mm);
 
     init_idle_task();
+#endif
 }
 
 static void __init do_initcalls(void)
index 6a643ada2440556d627beb52cea49d103060dfa0..ef7a39df89b49a485de85b5aab8904eac584b8d0 100644 (file)
@@ -659,7 +659,9 @@ static void __init do_boot_cpu (int apicid)
 
     map_cpu_to_boot_apicid(cpu, apicid);
 
+#if defined(__i386__)
     SET_DEFAULT_FAST_TRAP(&idle->thread);
+#endif
 
     idle_task[cpu] = idle;
 
index dec7618ed7537a983dddf5d5b132b30d1098f23c..dfe20e415d33faad6fe45a132d24d7caead8c648 100644 (file)
@@ -107,7 +107,8 @@ static struct irqaction irq0 = { timer_interrupt, "timer", NULL};
 
 static unsigned long __init calibrate_tsc(void)
 {
-    unsigned long startlow, starthigh, endlow, endhigh, count;
+    u64 start, end, diff;
+    unsigned long count;
 
     /* Set the Gate high, disable speaker */
     outb((inb(0x61) & ~0x02) | 0x01, 0x61);
@@ -123,22 +124,24 @@ static unsigned long __init calibrate_tsc(void)
     outb(CALIBRATE_LATCH & 0xff, 0x42); /* LSB of count */
     outb(CALIBRATE_LATCH >> 8, 0x42);   /* MSB of count */
 
-    rdtsc(startlow, starthigh);
+    rdtscll(start);
     for ( count = 0; (inb(0x61) & 0x20) == 0; count++ )
         continue;
-    rdtsc(endlow, endhigh);
+    rdtscll(end);
 
     /* Error if the CTC doesn't behave itself. */
     if ( count == 0 )
         return 0;
 
-    /* [endhigh:endlow] = [endhigh:endlow] - [starthigh:startlow] */
-    __asm__( "subl %2,%0 ; sbbl %3,%1"
-             : "=a" (endlow), "=d" (endhigh)
-             : "g" (startlow), "g" (starthigh), "0" (endlow), "1" (endhigh) );
+    diff = end - start;
 
+#if defined(_i386__)
     /* If quotient doesn't fit in 32 bits then we return error (zero). */
-    return endhigh ? 0 : endlow;
+    if ( diff & ~0xffffffffULL )
+        return 0;
+#endif
+
+    return (unsigned long)diff;
 }
 
 
index 064159046dc6b06511340d4b39fec664e7d126c5..02018f347a7f5bb7aa2d7557f0c44e1ebfb6fa53 100644 (file)
@@ -43,7 +43,11 @@ r_base = .
        lmsw    %ax             # into protected mode
        jmp     flush_instr
 flush_instr:
-       ljmpl   $__HYPERVISOR_CS, $0x100000 # 1MB
+#if defined(__x86_64__)
+       ljmpl   $__HYPERVISOR_CS32, $0x100000 # 1MB
+#else        
+       ljmpl   $__HYPERVISOR_CS,   $0x100000 # 1MB
+#endif
 
 idt_48:
        .word   0                       # idt limit = 0
index 59d0e320d6f480711b3354d1c78c4e18faafc6f4..c43aafea23822b625f251b93153d8ee961a354fa 100644 (file)
@@ -62,6 +62,8 @@ struct guest_trap_bounce {
     unsigned long  eip;               /*  12 */
 } guest_trap_bounce[NR_CPUS] = { { 0 } };
 
+#if defined(__i386__)
+
 #define DOUBLEFAULT_STACK_SIZE 1024
 static struct tss_struct doublefault_tss;
 static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
@@ -906,3 +908,5 @@ unsigned long do_get_debugreg(int reg)
     if ( (reg < 0) || (reg > 7) ) return -EINVAL;
     return current->thread.debugreg[reg];
 }
+
+#endif /* __i386__ */
diff --git a/xen/arch/x86/usercopy.c b/xen/arch/x86/usercopy.c
deleted file mode 100644 (file)
index dc2d34c..0000000
+++ /dev/null
@@ -1,190 +0,0 @@
-/* 
- * User address space access functions.
- * The non inlined parts of asm-i386/uaccess.h are here.
- *
- * Copyright 1997 Andi Kleen <ak@muc.de>
- * Copyright 1997 Linus Torvalds
- */
-#include <xen/config.h>
-#include <asm/uaccess.h>
-//#include <asm/mmx.h>
-
-#ifdef CONFIG_X86_USE_3DNOW_AND_WORKS
-
-unsigned long
-__generic_copy_to_user(void *to, const void *from, unsigned long n)
-{
-       if (access_ok(VERIFY_WRITE, to, n))
-       {
-               if(n<512)
-                       __copy_user(to,from,n);
-               else
-                       mmx_copy_user(to,from,n);
-       }
-       return n;
-}
-
-unsigned long
-__generic_copy_from_user(void *to, const void *from, unsigned long n)
-{
-       if (access_ok(VERIFY_READ, from, n))
-       {
-               if(n<512)
-                       __copy_user_zeroing(to,from,n);
-               else
-                       mmx_copy_user_zeroing(to, from, n);
-       }
-       else
-               memset(to, 0, n);
-       return n;
-}
-
-#else
-
-unsigned long
-__generic_copy_to_user(void *to, const void *from, unsigned long n)
-{
-       prefetch(from);
-       if (access_ok(VERIFY_WRITE, to, n))
-               __copy_user(to,from,n);
-       return n;
-}
-
-unsigned long
-__generic_copy_from_user(void *to, const void *from, unsigned long n)
-{
-       prefetchw(to);
-       if (access_ok(VERIFY_READ, from, n))
-               __copy_user_zeroing(to,from,n);
-       else
-               memset(to, 0, n);
-       return n;
-}
-
-#endif
-
-/*
- * Copy a null terminated string from userspace.
- */
-
-#define __do_strncpy_from_user(dst,src,count,res)                         \
-do {                                                                      \
-       int __d0, __d1, __d2;                                              \
-       __asm__ __volatile__(                                              \
-               "       testl %1,%1\n"                                     \
-               "       jz 2f\n"                                           \
-               "0:     lodsb\n"                                           \
-               "       stosb\n"                                           \
-               "       testb %%al,%%al\n"                                 \
-               "       jz 1f\n"                                           \
-               "       decl %1\n"                                         \
-               "       jnz 0b\n"                                          \
-               "1:     subl %1,%0\n"                                      \
-               "2:\n"                                                     \
-               ".section .fixup,\"ax\"\n"                                 \
-               "3:     movl %5,%0\n"                                      \
-               "       jmp 2b\n"                                          \
-               ".previous\n"                                              \
-               ".section __ex_table,\"a\"\n"                              \
-               "       .align 4\n"                                        \
-               "       .long 0b,3b\n"                                     \
-               ".previous"                                                \
-               : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1),      \
-                 "=&D" (__d2)                                             \
-               : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
-               : "memory");                                               \
-} while (0)
-
-long
-__strncpy_from_user(char *dst, const char *src, long count)
-{
-       long res;
-       __do_strncpy_from_user(dst, src, count, res);
-       return res;
-}
-
-long
-strncpy_from_user(char *dst, const char *src, long count)
-{
-       long res = -EFAULT;
-       if (access_ok(VERIFY_READ, src, 1))
-               __do_strncpy_from_user(dst, src, count, res);
-       return res;
-}
-
-
-/*
- * Zero Userspace
- */
-
-#define __do_clear_user(addr,size)                                     \
-do {                                                                   \
-       int __d0;                                                       \
-       __asm__ __volatile__(                                           \
-               "0:     rep; stosl\n"                                   \
-               "       movl %2,%0\n"                                   \
-               "1:     rep; stosb\n"                                   \
-               "2:\n"                                                  \
-               ".section .fixup,\"ax\"\n"                              \
-               "3:     lea 0(%2,%0,4),%0\n"                            \
-               "       jmp 2b\n"                                       \
-               ".previous\n"                                           \
-               ".section __ex_table,\"a\"\n"                           \
-               "       .align 4\n"                                     \
-               "       .long 0b,3b\n"                                  \
-               "       .long 1b,2b\n"                                  \
-               ".previous"                                             \
-               : "=&c"(size), "=&D" (__d0)                             \
-               : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0));     \
-} while (0)
-
-unsigned long
-clear_user(void *to, unsigned long n)
-{
-       if (access_ok(VERIFY_WRITE, to, n))
-               __do_clear_user(to, n);
-       return n;
-}
-
-unsigned long
-__clear_user(void *to, unsigned long n)
-{
-       __do_clear_user(to, n);
-       return n;
-}
-
-/*
- * Return the size of a string (including the ending 0)
- *
- * Return 0 on exception, a value greater than N if too long
- */
-
-long strnlen_user(const char *s, long n)
-{
-       unsigned long mask = -__addr_ok(s);
-       unsigned long res, tmp;
-
-       __asm__ __volatile__(
-               "       testl %0, %0\n"
-               "       jz 3f\n"
-               "       andl %0,%%ecx\n"
-               "0:     repne; scasb\n"
-               "       setne %%al\n"
-               "       subl %%ecx,%0\n"
-               "       addl %0,%%eax\n"
-               "1:\n"
-               ".section .fixup,\"ax\"\n"
-               "2:     xorl %%eax,%%eax\n"
-               "       jmp 1b\n"
-               "3:     movb $1,%%al\n"
-               "       jmp 1b\n"
-               ".previous\n"
-               ".section __ex_table,\"a\"\n"
-               "       .align 4\n"
-               "       .long 0b,2b\n"
-               ".previous"
-               :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp)
-               :"0" (n), "1" (s), "2" (0), "3" (mask)
-               :"cc");
-       return res & mask;
-}
diff --git a/xen/arch/x86/x86_32/domain_page.c b/xen/arch/x86/x86_32/domain_page.c
new file mode 100644 (file)
index 0000000..23b29a0
--- /dev/null
@@ -0,0 +1,81 @@
+/******************************************************************************
+ * domain_page.h
+ * 
+ * Allow temporary mapping of domain pages. Based on ideas from the
+ * Linux PKMAP code -- the copyrights and credits are retained below.
+ */
+
+/*
+ * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
+ *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de *
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <xen/mm.h>
+#include <xen/perfc.h>
+#include <asm/domain_page.h>
+#include <asm/flushtlb.h>
+
+unsigned long *mapcache;
+static unsigned int map_idx, shadow_map_idx[NR_CPUS];
+static spinlock_t map_lock = SPIN_LOCK_UNLOCKED;
+
+/* Use a spare PTE bit to mark entries ready for recycling. */
+#define READY_FOR_TLB_FLUSH (1<<10)
+
+static void flush_all_ready_maps(void)
+{
+    unsigned long *cache = mapcache;
+
+    /* A bit skanky -- depends on having an aligned PAGE_SIZE set of PTEs. */
+    do { if ( (*cache & READY_FOR_TLB_FLUSH) ) *cache = 0; }
+    while ( ((unsigned long)(++cache) & ~PAGE_MASK) != 0 );
+
+    perfc_incrc(domain_page_tlb_flush);
+    local_flush_tlb();
+}
+
+
+void *map_domain_mem(unsigned long pa)
+{
+    unsigned long va;
+    unsigned int idx, cpu = smp_processor_id();
+    unsigned long *cache = mapcache;
+    unsigned long flags;
+
+    perfc_incrc(map_domain_mem_count);
+
+    spin_lock_irqsave(&map_lock, flags);
+
+    /* Has some other CPU caused a wrap? We must flush if so. */
+    if ( map_idx < shadow_map_idx[cpu] )
+    {
+        perfc_incrc(domain_page_tlb_flush);
+        local_flush_tlb();
+    }
+
+    for ( ; ; )
+    {
+        idx = map_idx = (map_idx + 1) & (MAPCACHE_ENTRIES - 1);
+        if ( idx == 0 ) flush_all_ready_maps();
+        if ( cache[idx] == 0 ) break;
+    }
+
+    cache[idx] = (pa & PAGE_MASK) | __PAGE_HYPERVISOR;
+
+    spin_unlock_irqrestore(&map_lock, flags);
+
+    shadow_map_idx[cpu] = idx;
+
+    va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT) + (pa & ~PAGE_MASK);
+    return (void *)va;
+}
+
+void unmap_domain_mem(void *va)
+{
+    unsigned int idx;
+    idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT;
+    mapcache[idx] |= READY_FOR_TLB_FLUSH;
+}
diff --git a/xen/arch/x86/x86_32/entry.S b/xen/arch/x86/x86_32/entry.S
new file mode 100644 (file)
index 0000000..f3b6885
--- /dev/null
@@ -0,0 +1,696 @@
+/*
+ * Hypercall and fault low-level handling routines.
+ *
+ * Copyright (c) 2002-2004, K A Fraser
+ * Copyright (c) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * The idea for callbacks to guest OSes
+ * ====================================
+ *
+ * First, we require that all callbacks (either via a supplied
+ * interrupt-descriptor-table, or via the special event or failsafe callbacks
+ * in the shared-info-structure) are to ring 1. This just makes life easier,
+ * in that it means we don't have to do messy GDT/LDT lookups to find
+ * out which the privilege-level of the return code-selector. That code
+ * would just be a hassle to write, and would need to account for running
+ * off the end of the GDT/LDT, for example. For all callbacks we check
+ * that the provided
+ * return CS is not == __HYPERVISOR_{CS,DS}. Apart from that we're safe as
+ * don't allow a guest OS to install ring-0 privileges into the GDT/LDT.
+ * It's up to the guest OS to ensure all returns via the IDT are to ring 1.
+ * If not, we load incorrect SS/ESP values from the TSS (for ring 1 rather
+ * than the correct ring) and bad things are bound to ensue -- IRET is
+ * likely to fault, and we may end up killing the domain (no harm can
+ * come to Xen, though).
+ *      
+ * When doing a callback, we check if the return CS is in ring 0. If so,
+ * callback is delayed until next return to ring != 0.
+ * If return CS is in ring 1, then we create a callback frame
+ * starting at return SS/ESP. The base of the frame does an intra-privilege
+ * interrupt-return.
+ * If return CS is in ring > 1, we create a callback frame starting
+ * at SS/ESP taken from appropriate section of the current TSS. The base
+ * of the frame does an inter-privilege interrupt-return.
+ * 
+ * Note that the "failsafe callback" uses a special stackframe:
+ * { return_DS, return_ES, return_FS, return_GS, return_EIP,
+ *   return_CS, return_EFLAGS[, return_ESP, return_SS] }
+ * That is, original values for DS/ES/FS/GS are placed on stack rather than
+ * in DS/ES/FS/GS themselves. Why? It saves us loading them, only to have them
+ * saved/restored in guest OS. Furthermore, if we load them we may cause
+ * a fault if they are invalid, which is a hassle to deal with. We avoid
+ * that problem if we don't load them :-) This property allows us to use
+ * the failsafe callback as a fallback: if we ever fault on loading DS/ES/FS/GS
+ * on return to ring != 0, we can simply package it up as a return via
+ * the failsafe callback, and let the guest OS sort it out (perhaps by
+ * killing an application process). Note that we also do this for any
+ * faulting IRET -- just let the guest OS handle it via the event
+ * callback.
+ *
+ * We terminate a domain in the following cases:
+ *  - creating a callback stack frame (due to bad ring-1 stack).
+ *  - faulting IRET on entry to failsafe callback handler.
+ * So, each domain must keep its ring-1 %ss/%esp and failsafe callback
+ * handler in good order (absolutely no faults allowed!).
+ */
+
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <hypervisor-ifs/hypervisor-if.h>
+
+EBX            = 0x00
+ECX            = 0x04
+EDX            = 0x08
+ESI            = 0x0C
+EDI            = 0x10
+EBP            = 0x14
+EAX            = 0x18
+DS             = 0x1C
+ES             = 0x20
+FS              = 0x24
+GS              = 0x28
+ORIG_EAX       = 0x2C
+EIP            = 0x30
+CS             = 0x34
+EFLAGS         = 0x38
+OLDESP         = 0x3C
+OLDSS          = 0x40
+
+/* Offsets in domain structure */
+PROCESSOR       =  0
+SHARED_INFO     =  4
+EVENT_SEL       =  8
+EVENT_ADDR      = 12
+FAILSAFE_BUFFER = 16
+FAILSAFE_SEL    = 32
+FAILSAFE_ADDR   = 36
+
+/* Offsets in shared_info_t */
+#define UPCALL_PENDING /* 0 */
+#define UPCALL_MASK       1
+
+/* Offsets in guest_trap_bounce */
+GTB_ERROR_CODE   =  0
+GTB_CR2          =  4
+GTB_FLAGS        =  8
+GTB_CS           = 10
+GTB_EIP          = 12
+GTBF_TRAP        =  1
+GTBF_TRAP_NOCODE =  2
+GTBF_TRAP_CR2    =  4
+                        
+CF_MASK                = 0x00000001
+IF_MASK                = 0x00000200
+NT_MASK                = 0x00004000
+        
+#define SAVE_ALL_NOSEGREGS \
+        cld; \
+        pushl %gs; \
+        pushl %fs; \
+        pushl %es; \
+        pushl %ds; \
+        pushl %eax; \
+        pushl %ebp; \
+        pushl %edi; \
+        pushl %esi; \
+        pushl %edx; \
+        pushl %ecx; \
+        pushl %ebx; \
+
+#define SAVE_ALL \
+        SAVE_ALL_NOSEGREGS \
+        movl $(__HYPERVISOR_DS),%edx; \
+        movl %edx,%ds; \
+        movl %edx,%es; \
+        movl %edx,%fs; \
+        movl %edx,%gs; \
+        sti;
+
+#define GET_CURRENT(reg)   \
+        movl $4096-4, reg; \
+        orl  %esp, reg;    \
+        andl $~3,reg;      \
+        movl (reg),reg;
+
+ENTRY(continue_nonidle_task)
+        GET_CURRENT(%ebx)
+        jmp test_all_events
+
+        ALIGN
+/*
+ * HYPERVISOR_multicall(call_list, nr_calls)
+ *   Execute a list of 'nr_calls' hypercalls, pointed at by 'call_list'.
+ *   This is fairly easy except that:
+ *   1. We may fault reading the call list, and must patch that up; and
+ *   2. We cannot recursively call HYPERVISOR_multicall, or a malicious
+ *      caller could cause our stack to blow up.
+ */
+do_multicall:
+        popl  %eax
+        cmpl  $SYMBOL_NAME(multicall_return_from_call),%eax
+        je    multicall_return_from_call
+        pushl %ebx
+        movl  4(%esp),%ebx   /* EBX == call_list */
+        movl  8(%esp),%ecx   /* ECX == nr_calls  */
+multicall_loop:
+        pushl %ecx
+multicall_fault1: 
+        pushl 20(%ebx)      # args[4]
+multicall_fault2: 
+        pushl 16(%ebx)      # args[3]
+multicall_fault3: 
+        pushl 12(%ebx)      # args[2]
+multicall_fault4: 
+        pushl 8(%ebx)       # args[1]
+multicall_fault5: 
+        pushl 4(%ebx)       # args[0]
+multicall_fault6: 
+        movl  (%ebx),%eax   # op
+        andl  $(NR_hypercalls-1),%eax
+        call  *SYMBOL_NAME(hypercall_table)(,%eax,4)
+multicall_return_from_call:
+multicall_fault7:
+        movl  %eax,24(%ebx) # args[5] == result
+        addl  $20,%esp
+        popl  %ecx
+        addl  $(ARGS_PER_MULTICALL_ENTRY*4),%ebx
+        loop  multicall_loop
+        popl  %ebx
+        xorl  %eax,%eax
+        jmp   ret_from_hypercall
+
+.section __ex_table,"a"
+        .align 4
+        .long multicall_fault1, multicall_fixup1
+        .long multicall_fault2, multicall_fixup2
+        .long multicall_fault3, multicall_fixup3
+        .long multicall_fault4, multicall_fixup4
+        .long multicall_fault5, multicall_fixup5
+        .long multicall_fault6, multicall_fixup6
+.previous
+               
+.section .fixup,"ax"
+multicall_fixup6: 
+        addl  $4,%esp
+multicall_fixup5: 
+        addl  $4,%esp
+multicall_fixup4: 
+        addl  $4,%esp
+multicall_fixup3: 
+        addl  $4,%esp
+multicall_fixup2: 
+        addl  $4,%esp
+multicall_fixup1:
+        addl  $4,%esp
+        popl  %ebx
+        movl  $-EFAULT,%eax
+        jmp   ret_from_hypercall
+.previous        
+                
+        ALIGN
+restore_all_guest:
+        # First, may need to restore %ds if clobbered by create_bounce_frame
+        pushl %ss
+        popl  %ds
+        # Second, create a failsafe copy of DS,ES,FS,GS in case any are bad
+        leal  DS(%esp),%esi
+        leal  FAILSAFE_BUFFER(%ebx),%edi
+        movsl
+        movsl
+        movsl
+        movsl
+        # Finally, restore guest registers -- faults will cause failsafe
+        popl %ebx
+       popl %ecx
+       popl %edx
+       popl %esi
+       popl %edi
+       popl %ebp
+       popl %eax
+1:     popl %ds
+2:     popl %es
+3:     popl %fs
+4:     popl %gs
+        addl $4,%esp
+5:      iret
+.section .fixup,"ax"
+10:     subl $4,%esp
+        pushl %gs
+9:      pushl %fs
+8:      pushl %es
+7:      pushl %ds
+6:      pushl %eax
+       pushl %ebp
+       pushl %edi
+       pushl %esi
+       pushl %edx
+       pushl %ecx
+       pushl %ebx
+       pushl %ss
+       popl  %ds
+       pushl %ss
+       popl  %es
+       jmp  failsafe_callback
+.previous
+.section __ex_table,"a"
+       .align 4
+       .long 1b,6b
+       .long 2b,7b
+       .long 3b,8b
+       .long 4b,9b
+       .long 5b,10b
+.previous
+
+/* No special register assumptions */
+failsafe_callback:
+        GET_CURRENT(%ebx)
+        movl PROCESSOR(%ebx),%eax
+        shl  $4,%eax
+        lea  guest_trap_bounce(%eax),%edx
+        movl FAILSAFE_ADDR(%ebx),%eax
+        movl %eax,GTB_EIP(%edx)
+        movl FAILSAFE_SEL(%ebx),%eax
+        movw %ax,GTB_CS(%edx)
+        call create_bounce_frame
+        subl $16,%esi                # add DS/ES/FS/GS to failsafe stack frame
+        leal FAILSAFE_BUFFER(%ebx),%ebp
+        movl  0(%ebp),%eax           # DS
+FAULT1: movl %eax,(%esi) 
+        movl  4(%ebp),%eax           # ES
+FAULT2: movl %eax,4(%esi)
+        movl  8(%ebp),%eax           # FS
+FAULT3: movl %eax,8(%esi) 
+        movl 12(%ebp),%eax           # GS
+FAULT4: movl %eax,12(%esi)
+        movl %esi,OLDESP(%esp)
+        popl %ebx
+        popl %ecx
+        popl %edx
+        popl %esi
+        popl %edi
+        popl %ebp
+        popl %eax
+        addl $20,%esp                # skip DS/ES/FS/GS/ORIG_EAX
+FAULT5: iret 
+
+
+        ALIGN
+# Simple restore -- we should never fault as we we will only interrupt ring 0
+# when sane values have been placed in all registers. The only exception is
+# NMI, which may interrupt before good values have been placed in DS-GS.
+# The NMI return code deals with this problem itself.
+restore_all_xen:
+       popl %ebx
+       popl %ecx
+       popl %edx
+       popl %esi
+       popl %edi
+       popl %ebp
+       popl %eax
+       popl %ds
+       popl %es
+       popl %fs
+       popl %gs
+        addl $4,%esp
+        iret
+
+        ALIGN
+ENTRY(hypercall)
+        pushl %eax                     # save orig_eax
+       SAVE_ALL
+       GET_CURRENT(%ebx)
+       andl $(NR_hypercalls-1),%eax
+       call *SYMBOL_NAME(hypercall_table)(,%eax,4)
+
+ret_from_hypercall:
+        movl %eax,EAX(%esp)            # save the return value
+
+test_all_events:
+        xorl %ecx,%ecx
+        notl %ecx
+        cli                             # tests must not race interrupts
+/*test_softirqs:*/  
+        movl PROCESSOR(%ebx),%eax
+        shl  $6,%eax                    # sizeof(irq_cpustat) == 64
+        test %ecx,SYMBOL_NAME(irq_stat)(%eax,1)
+        jnz  process_softirqs
+/*test_guest_events:*/
+        movl SHARED_INFO(%ebx),%eax
+        testb $0xFF,UPCALL_MASK(%eax)
+        jnz  restore_all_guest
+        testb $0xFF,UPCALL_PENDING(%eax)
+        jz   restore_all_guest
+        movb $1,UPCALL_MASK(%eax)       # Upcalls are masked during delivery
+/*process_guest_events:*/
+        movl PROCESSOR(%ebx),%edx
+        shl  $4,%edx                    # sizeof(guest_trap_bounce) == 16
+        lea  guest_trap_bounce(%edx),%edx
+        movl EVENT_ADDR(%ebx),%eax
+        movl %eax,GTB_EIP(%edx)
+        movl EVENT_SEL(%ebx),%eax
+        movw %ax,GTB_CS(%edx)
+        call create_bounce_frame
+        jmp  restore_all_guest
+
+        ALIGN
+process_softirqs:
+        sti       
+        call SYMBOL_NAME(do_softirq)
+        jmp  test_all_events
+                
+/* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK:         */
+/*   {EIP, CS, EFLAGS, [ESP, SS]}                                     */
+/* %edx == guest_trap_bounce, %ebx == task_struct                     */
+/* %eax,%ecx are clobbered. %ds:%esi contain new OLDSS/OLDESP.        */
+create_bounce_frame:        
+        mov  CS+4(%esp),%cl
+        test $2,%cl
+        jz   1f /* jump if returning to an existing ring-1 activation */
+        /* obtain ss/esp from TSS -- no current ring-1 activations */
+        movl PROCESSOR(%ebx),%eax
+        /* next 4 lines multiply %eax by 8320, which is sizeof(tss_struct) */
+        movl %eax, %ecx
+        shll $7, %ecx
+        shll $13, %eax
+        addl %ecx,%eax
+        addl $init_tss + 12,%eax
+        movl (%eax),%esi /* tss->esp1 */
+FAULT6: movl 4(%eax),%ds /* tss->ss1  */
+        /* base of stack frame must contain ss/esp (inter-priv iret) */
+        subl $8,%esi
+        movl OLDESP+4(%esp),%eax
+FAULT7: movl %eax,(%esi) 
+        movl OLDSS+4(%esp),%eax
+FAULT8: movl %eax,4(%esi) 
+        jmp 2f
+1:      /* obtain ss/esp from oldss/oldesp -- a ring-1 activation exists */
+        movl OLDESP+4(%esp),%esi
+FAULT9: movl OLDSS+4(%esp),%ds 
+2:      /* Construct a stack frame: EFLAGS, CS/EIP */
+        subl $12,%esi
+        movl EIP+4(%esp),%eax
+FAULT10:movl %eax,(%esi) 
+        movl CS+4(%esp),%eax
+FAULT11:movl %eax,4(%esi) 
+        movl EFLAGS+4(%esp),%eax
+FAULT12:movl %eax,8(%esi)
+        /* Rewrite our stack frame and return to ring 1. */
+        /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
+        andl $0xfffcbeff,%eax
+        movl %eax,EFLAGS+4(%esp)
+        movl %ds,OLDSS+4(%esp)
+        movl %esi,OLDESP+4(%esp)
+        movzwl %es:GTB_CS(%edx),%eax
+        movl %eax,CS+4(%esp)
+        movl %es:GTB_EIP(%edx),%eax
+        movl %eax,EIP+4(%esp)
+        ret
+        
+                              
+.section __ex_table,"a"
+        .align 4
+        .long FAULT1, crash_domain_fixup3 # Fault writing to ring-1 stack
+        .long FAULT2, crash_domain_fixup3 # Fault writing to ring-1 stack
+        .long FAULT3, crash_domain_fixup3 # Fault writing to ring-1 stack
+        .long FAULT4, crash_domain_fixup3 # Fault writing to ring-1 stack
+        .long FAULT5, crash_domain_fixup1 # Fault executing failsafe iret
+        .long FAULT6, crash_domain_fixup2 # Fault loading ring-1 stack selector
+        .long FAULT7, crash_domain_fixup2 # Fault writing to ring-1 stack
+        .long FAULT8, crash_domain_fixup2 # Fault writing to ring-1 stack
+        .long FAULT9, crash_domain_fixup2 # Fault loading ring-1 stack selector
+        .long FAULT10,crash_domain_fixup2 # Fault writing to ring-1 stack
+        .long FAULT11,crash_domain_fixup2 # Fault writing to ring-1 stack
+        .long FAULT12,crash_domain_fixup2 # Fault writing to ring-1 stack
+        .long FAULT13,crash_domain_fixup3 # Fault writing to ring-1 stack
+        .long FAULT14,crash_domain_fixup3 # Fault writing to ring-1 stack
+.previous
+               
+# This handler kills domains which experience unrecoverable faults.
+.section .fixup,"ax"
+crash_domain_fixup1:
+        subl  $4,%esp
+        SAVE_ALL
+        jmp   domain_crash
+crash_domain_fixup2:
+        addl  $4,%esp                     
+crash_domain_fixup3:
+        pushl %ss
+        popl  %ds
+        jmp   domain_crash
+.previous
+
+        ALIGN
+process_guest_exception_and_events:        
+        movl PROCESSOR(%ebx),%eax
+        shl  $4,%eax
+        lea  guest_trap_bounce(%eax),%edx
+        testb $~0,GTB_FLAGS(%edx)
+        jz   test_all_events
+        call create_bounce_frame        # just the basic frame
+        mov  %es:GTB_FLAGS(%edx),%cl
+        test $GTBF_TRAP_NOCODE,%cl
+        jnz  2f
+        subl $4,%esi                    # push error_code onto guest frame
+        movl %es:GTB_ERROR_CODE(%edx),%eax
+FAULT13:movl %eax,(%esi)
+        test $GTBF_TRAP_CR2,%cl
+        jz   1f
+        subl $4,%esi                    # push %cr2 onto guest frame
+        movl %es:GTB_CR2(%edx),%eax
+FAULT14:movl %eax,(%esi)
+1:      movl %esi,OLDESP(%esp)        
+2:      push %es                        # unclobber %ds
+        pop  %ds 
+        movb $0,GTB_FLAGS(%edx)
+        jmp  test_all_events
+
+        ALIGN
+ENTRY(ret_from_intr)
+       GET_CURRENT(%ebx)
+        movb CS(%esp),%al
+       testb $3,%al    # return to non-supervisor?
+       jne test_all_events
+       jmp restore_all_xen
+
+ENTRY(divide_error)
+       pushl $0                # no error code
+       pushl $ SYMBOL_NAME(do_divide_error)
+       ALIGN
+error_code:
+       pushl %fs
+       pushl %es
+       pushl %ds
+       pushl %eax
+       xorl  %eax,%eax
+       pushl %ebp
+       pushl %edi
+       pushl %esi
+       pushl %edx
+       decl  %eax                      # eax = -1
+       pushl %ecx
+       pushl %ebx
+       cld
+       movl  %gs,%ecx
+       movl  ORIG_EAX(%esp), %esi      # get the error code
+       movl  GS(%esp), %edi            # get the function address
+       movl  %eax, ORIG_EAX(%esp)
+       movl  %ecx, GS(%esp)
+       movl  $(__HYPERVISOR_DS),%edx
+       movl  %edx,%ds
+       movl  %edx,%es
+       movl  %edx,%fs
+       movl  %edx,%gs
+       movl  %esp,%edx
+       pushl %esi                      # push the error code
+       pushl %edx                      # push the pt_regs pointer
+       GET_CURRENT(%ebx)
+       call  *%edi
+        addl  $8,%esp
+        movb  CS(%esp),%al
+       testb $3,%al
+       je    restore_all_xen
+        jmp   process_guest_exception_and_events
+
+ENTRY(coprocessor_error)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_coprocessor_error)
+       jmp error_code
+
+ENTRY(simd_coprocessor_error)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_simd_coprocessor_error)
+       jmp error_code
+
+ENTRY(device_not_available)
+       pushl $0
+        pushl $SYMBOL_NAME(math_state_restore)
+        jmp   error_code
+
+ENTRY(debug)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_debug)
+       jmp error_code
+
+ENTRY(int3)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_int3)
+       jmp error_code
+
+ENTRY(overflow)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_overflow)
+       jmp error_code
+
+ENTRY(bounds)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_bounds)
+       jmp error_code
+
+ENTRY(invalid_op)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_invalid_op)
+       jmp error_code
+
+ENTRY(coprocessor_segment_overrun)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun)
+       jmp error_code
+
+ENTRY(invalid_TSS)
+       pushl $ SYMBOL_NAME(do_invalid_TSS)
+       jmp error_code
+
+ENTRY(segment_not_present)
+       pushl $ SYMBOL_NAME(do_segment_not_present)
+       jmp error_code
+
+ENTRY(stack_segment)
+       pushl $ SYMBOL_NAME(do_stack_segment)
+       jmp error_code
+
+ENTRY(general_protection)
+       pushl $ SYMBOL_NAME(do_general_protection)
+       jmp error_code
+
+ENTRY(alignment_check)
+       pushl $ SYMBOL_NAME(do_alignment_check)
+       jmp error_code
+
+ENTRY(page_fault)
+       pushl $ SYMBOL_NAME(do_page_fault)
+       jmp error_code
+
+ENTRY(machine_check)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_machine_check)
+       jmp error_code
+
+ENTRY(spurious_interrupt_bug)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_spurious_interrupt_bug)
+       jmp error_code
+
+ENTRY(nmi)
+        # Save state but do not trash the segment registers!
+        # We may otherwise be unable to reload them or copy them to ring 1. 
+       pushl %eax
+       SAVE_ALL_NOSEGREGS
+
+        # Check for hardware problems. These are always fatal so we can
+        # reload DS and ES when handling them.
+        inb   $0x61,%al
+        testb $0x80,%al
+        jne   nmi_parity_err
+        testb $0x40,%al
+        jne   nmi_io_err
+        movl  %eax,%ebx
+        
+        # Okay, its almost a normal NMI tick. We can only process it if:
+        #  A. We are the outermost Xen activation (in which case we have
+        #     the selectors safely saved on our stack)
+        #  B. DS-GS all contain sane Xen values.
+        # In all other cases we bail without touching DS-GS, as we have
+        # interrupted an enclosing Xen activation in tricky prologue or
+        # epilogue code.
+        movb  CS(%esp),%al
+       testb $3,%al
+        jne   do_watchdog_tick
+        movl  DS(%esp),%eax
+        cmpw  $(__HYPERVISOR_DS),%ax
+        jne   nmi_badseg
+        movl  ES(%esp),%eax
+        cmpw  $(__HYPERVISOR_DS),%ax
+        jne   nmi_badseg
+        movl  FS(%esp),%eax
+        cmpw  $(__HYPERVISOR_DS),%ax
+        jne   nmi_badseg
+        movl  GS(%esp),%eax
+        cmpw  $(__HYPERVISOR_DS),%ax
+        jne   nmi_badseg
+
+do_watchdog_tick:
+        movl  $(__HYPERVISOR_DS),%edx
+        movl  %edx,%ds
+        movl  %edx,%es
+        movl  %esp,%edx
+       pushl %ebx   # reason
+       pushl %edx   # regs
+        call  SYMBOL_NAME(do_nmi)
+       addl  $8,%esp
+        movb  CS(%esp),%al
+       testb $3,%al
+       je    restore_all_xen
+        GET_CURRENT(%ebx)
+        jmp   restore_all_guest
+
+nmi_badseg:
+       popl %ebx
+       popl %ecx
+       popl %edx
+       popl %esi
+       popl %edi
+       popl %ebp
+       popl %eax
+        addl $20,%esp
+        iret
+
+nmi_parity_err: 
+        movl $(__HYPERVISOR_DS),%edx
+        movl %edx,%ds
+        movl %edx,%es
+        jmp  SYMBOL_NAME(mem_parity_error)
+        
+nmi_io_err: 
+        movl $(__HYPERVISOR_DS),%edx
+        movl %edx,%ds
+        movl %edx,%es
+        jmp  SYMBOL_NAME(io_check_error)                        
+        
+.data
+ENTRY(hypercall_table)
+        .long SYMBOL_NAME(do_set_trap_table)     /*  0 */
+        .long SYMBOL_NAME(do_mmu_update)
+        .long SYMBOL_NAME(do_set_gdt)
+        .long SYMBOL_NAME(do_stack_switch)
+        .long SYMBOL_NAME(do_set_callbacks)
+        .long SYMBOL_NAME(do_fpu_taskswitch)     /*  5 */
+        .long SYMBOL_NAME(do_sched_op)
+        .long SYMBOL_NAME(do_dom0_op)
+        .long SYMBOL_NAME(do_set_debugreg)
+        .long SYMBOL_NAME(do_get_debugreg)
+        .long SYMBOL_NAME(do_update_descriptor)  /* 10 */
+        .long SYMBOL_NAME(do_set_fast_trap)
+        .long SYMBOL_NAME(do_dom_mem_op)
+        .long SYMBOL_NAME(do_multicall)
+        .long SYMBOL_NAME(do_update_va_mapping)
+        .long SYMBOL_NAME(do_set_timer_op)       /* 15 */
+        .long SYMBOL_NAME(do_event_channel_op)
+        .long SYMBOL_NAME(do_xen_version)
+        .long SYMBOL_NAME(do_console_io)
+        .long SYMBOL_NAME(do_physdev_op)
+        .long SYMBOL_NAME(do_update_va_mapping_otherdomain) /* 20 */
+        .rept NR_hypercalls-((.-hypercall_table)/4)
+        .long SYMBOL_NAME(do_ni_hypercall)
+        .endr
diff --git a/xen/arch/x86/x86_32/mm.c b/xen/arch/x86/x86_32/mm.c
new file mode 100644 (file)
index 0000000..59e304e
--- /dev/null
@@ -0,0 +1,412 @@
+/******************************************************************************
+ * arch/i386/mm.c
+ * 
+ * Modifications to Linux original are copyright (c) 2002-2003, K A Fraser
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/init.h>
+#include <xen/mm.h>
+#include <asm/page.h>
+#include <asm/flushtlb.h>
+#include <asm/fixmap.h>
+#include <asm/domain_page.h>
+
+static inline void set_pte_phys(unsigned long vaddr,
+                                l1_pgentry_t entry)
+{
+    l2_pgentry_t *l2ent;
+    l1_pgentry_t *l1ent;
+
+    l2ent = &idle_pg_table[l2_table_offset(vaddr)];
+    l1ent = l2_pgentry_to_l1(*l2ent) + l1_table_offset(vaddr);
+    *l1ent = entry;
+
+    /* It's enough to flush this one mapping. */
+    __flush_tlb_one(vaddr);
+}
+
+
+void __set_fixmap(enum fixed_addresses idx, 
+                  l1_pgentry_t entry)
+{
+    unsigned long address = fix_to_virt(idx);
+
+    if ( likely(idx < __end_of_fixed_addresses) )
+        set_pte_phys(address, entry);
+    else
+        printk("Invalid __set_fixmap\n");
+}
+
+
+static void __init fixrange_init(unsigned long start, 
+                                 unsigned long end, 
+                                 l2_pgentry_t *pg_base)
+{
+    l2_pgentry_t *l2e;
+    int i;
+    unsigned long vaddr, page;
+
+    vaddr = start;
+    i = l2_table_offset(vaddr);
+    l2e = pg_base + i;
+
+    for ( ; (i < ENTRIES_PER_L2_PAGETABLE) && (vaddr != end); l2e++, i++ ) 
+    {
+        if ( !l2_pgentry_empty(*l2e) )
+            continue;
+        page = (unsigned long)get_free_page();
+        clear_page(page);
+        *l2e = mk_l2_pgentry(__pa(page) | __PAGE_HYPERVISOR);
+        vaddr += 1 << L2_PAGETABLE_SHIFT;
+    }
+}
+
+void __init paging_init(void)
+{
+    unsigned long addr;
+    void *ioremap_pt;
+    int i;
+
+    /* Idle page table 1:1 maps the first part of physical memory. */
+    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
+        idle_pg_table[i] = 
+            mk_l2_pgentry((i << L2_PAGETABLE_SHIFT) | 
+                          __PAGE_HYPERVISOR | _PAGE_PSE);
+
+    /*
+     * Fixed mappings, only the page table structure has to be
+     * created - mappings will be set by set_fixmap():
+     */
+    addr = FIXADDR_START & ~((1<<L2_PAGETABLE_SHIFT)-1);
+    fixrange_init(addr, 0, idle_pg_table);
+
+    /* Create page table for ioremap(). */
+    ioremap_pt = (void *)get_free_page();
+    clear_page(ioremap_pt);
+    idle_pg_table[IOREMAP_VIRT_START >> L2_PAGETABLE_SHIFT] = 
+        mk_l2_pgentry(__pa(ioremap_pt) | __PAGE_HYPERVISOR);
+
+    /* Create read-only mapping of MPT for guest-OS use. */
+    idle_pg_table[RO_MPT_VIRT_START >> L2_PAGETABLE_SHIFT] =
+        idle_pg_table[RDWR_MPT_VIRT_START >> L2_PAGETABLE_SHIFT];
+    mk_l2_readonly(idle_pg_table + 
+                   (RO_MPT_VIRT_START >> L2_PAGETABLE_SHIFT));
+
+    /* Set up mapping cache for domain pages. */
+    mapcache = (unsigned long *)get_free_page();
+    clear_page(mapcache);
+    idle_pg_table[MAPCACHE_VIRT_START >> L2_PAGETABLE_SHIFT] =
+        mk_l2_pgentry(__pa(mapcache) | __PAGE_HYPERVISOR);
+
+    /* Set up linear page table mapping. */
+    idle_pg_table[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
+        mk_l2_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR);
+
+}
+
+void __init zap_low_mappings(void)
+{
+    int i;
+    for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
+        idle_pg_table[i] = mk_l2_pgentry(0);
+    flush_tlb_all_pge();
+}
+
+
+long do_stack_switch(unsigned long ss, unsigned long esp)
+{
+    int nr = smp_processor_id();
+    struct tss_struct *t = &init_tss[nr];
+
+    /* We need to do this check as we load and use SS on guest's behalf. */
+    if ( (ss & 3) == 0 )
+        return -EPERM;
+
+    current->thread.guestos_ss = ss;
+    current->thread.guestos_sp = esp;
+    t->ss1  = ss;
+    t->esp1 = esp;
+
+    return 0;
+}
+
+
+/* Returns TRUE if given descriptor is valid for GDT or LDT. */
+int check_descriptor(unsigned long a, unsigned long b)
+{
+    unsigned long base, limit;
+
+    /* A not-present descriptor will always fault, so is safe. */
+    if ( !(b & _SEGMENT_P) ) 
+        goto good;
+
+    /*
+     * We don't allow a DPL of zero. There is no legitimate reason for 
+     * specifying DPL==0, and it gets rather dangerous if we also accept call 
+     * gates (consider a call gate pointing at another guestos descriptor with 
+     * DPL 0 -- this would get the OS ring-0 privileges).
+     */
+    if ( (b & _SEGMENT_DPL) == 0 )
+        goto bad;
+
+    if ( !(b & _SEGMENT_S) )
+    {
+        /*
+         * System segment:
+         *  1. Don't allow interrupt or trap gates as they belong in the IDT.
+         *  2. Don't allow TSS descriptors or task gates as we don't
+         *     virtualise x86 tasks.
+         *  3. Don't allow LDT descriptors because they're unnecessary and
+         *     I'm uneasy about allowing an LDT page to contain LDT
+         *     descriptors. In any case, Xen automatically creates the
+         *     required descriptor when reloading the LDT register.
+         *  4. We allow call gates but they must not jump to a private segment.
+         */
+
+        /* Disallow everything but call gates. */
+        if ( (b & _SEGMENT_TYPE) != 0xc00 )
+            goto bad;
+
+        /* Can't allow far jump to a Xen-private segment. */
+        if ( !VALID_CODESEL(a>>16) )
+            goto bad;
+
+        /* Reserved bits must be zero. */
+        if ( (b & 0xe0) != 0 )
+            goto bad;
+        
+        /* No base/limit check is needed for a call gate. */
+        goto good;
+    }
+    
+    /* Check that base/limit do not overlap Xen-private space. */
+    base  = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
+    limit = (b&0xf0000) | (a&0xffff);
+    limit++; /* We add one because limit is inclusive. */
+    if ( (b & _SEGMENT_G) )
+        limit <<= 12;
+    if ( ((base + limit) <= base) || 
+         ((base + limit) > PAGE_OFFSET) )
+        goto bad;
+
+ good:
+    return 1;
+ bad:
+    return 0;
+}
+
+
+long set_gdt(struct domain *p, 
+             unsigned long *frames,
+             unsigned int entries)
+{
+    /* NB. There are 512 8-byte entries per GDT page. */
+    int i, nr_pages = (entries + 511) / 512;
+    unsigned long pfn;
+    struct desc_struct *vgdt;
+
+    /* Check the new GDT. */
+    for ( i = 0; i < nr_pages; i++ )
+    {
+        if ( unlikely(frames[i] >= max_page) ||
+             unlikely(!get_page_and_type(&frame_table[frames[i]], 
+                                         p, PGT_gdt_page)) )
+            goto fail;
+    }
+
+    /* Copy reserved GDT entries to the new GDT. */
+    vgdt = map_domain_mem(frames[0] << PAGE_SHIFT);
+    memcpy(vgdt + FIRST_RESERVED_GDT_ENTRY, 
+           gdt_table + FIRST_RESERVED_GDT_ENTRY, 
+           NR_RESERVED_GDT_ENTRIES*8);
+    unmap_domain_mem(vgdt);
+
+    /* Tear down the old GDT. */
+    for ( i = 0; i < 16; i++ )
+    {
+        if ( (pfn = l1_pgentry_to_pagenr(p->mm.perdomain_pt[i])) != 0 )
+            put_page_and_type(&frame_table[pfn]);
+        p->mm.perdomain_pt[i] = mk_l1_pgentry(0);
+    }
+
+    /* Install the new GDT. */
+    for ( i = 0; i < nr_pages; i++ )
+        p->mm.perdomain_pt[i] =
+            mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR);
+
+    SET_GDT_ADDRESS(p, GDT_VIRT_START);
+    SET_GDT_ENTRIES(p, (entries*8)-1);
+
+    return 0;
+
+ fail:
+    while ( i-- > 0 )
+        put_page_and_type(&frame_table[frames[i]]);
+    return -EINVAL;
+}
+
+
+long do_set_gdt(unsigned long *frame_list, unsigned int entries)
+{
+    int nr_pages = (entries + 511) / 512;
+    unsigned long frames[16];
+    long ret;
+
+    if ( (entries <= LAST_RESERVED_GDT_ENTRY) || (entries > 8192) ) 
+        return -EINVAL;
+    
+    if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) )
+        return -EFAULT;
+
+    if ( (ret = set_gdt(current, frames, entries)) == 0 )
+    {
+        local_flush_tlb();
+        __asm__ __volatile__ ("lgdt %0" : "=m" (*current->mm.gdt));
+    }
+
+    return ret;
+}
+
+
+long do_update_descriptor(
+    unsigned long pa, unsigned long word1, unsigned long word2)
+{
+    unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT;
+    struct pfn_info *page;
+    long ret = -EINVAL;
+
+    if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(word1, word2) )
+        return -EINVAL;
+
+    page = &frame_table[pfn];
+    if ( unlikely(!get_page(page, current)) )
+        goto out;
+
+    /* Check if the given frame is in use in an unsafe context. */
+    switch ( page->type_and_flags & PGT_type_mask )
+    {
+    case PGT_gdt_page:
+        /* Disallow updates of Xen-reserved descriptors in the current GDT. */
+        if ( (l1_pgentry_to_pagenr(current->mm.perdomain_pt[0]) == pfn) &&
+             (((pa&(PAGE_SIZE-1))>>3) >= FIRST_RESERVED_GDT_ENTRY) &&
+             (((pa&(PAGE_SIZE-1))>>3) <= LAST_RESERVED_GDT_ENTRY) )
+            goto out;
+        if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
+            goto out;
+        break;
+    case PGT_ldt_page:
+        if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
+            goto out;
+        break;
+    default:
+        if ( unlikely(!get_page_type(page, PGT_writeable_page)) )
+            goto out;
+        break;
+    }
+
+    /* All is good so make the update. */
+    gdt_pent = map_domain_mem(pa);
+    gdt_pent[0] = word1;
+    gdt_pent[1] = word2;
+    unmap_domain_mem(gdt_pent);
+
+    put_page_type(page);
+
+    ret = 0; /* success */
+
+ out:
+    put_page(page);
+    return ret;
+}
+
+#ifdef MEMORY_GUARD
+
+void *memguard_init(void *heap_start)
+{
+    l1_pgentry_t *l1;
+    int i, j;
+
+    /* Round the allocation pointer up to a page boundary. */
+    heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) & 
+                          PAGE_MASK);
+
+    /* Memory guarding is incompatible with super pages. */
+    for ( i = 0; i < (xenheap_phys_end >> L2_PAGETABLE_SHIFT); i++ )
+    {
+        l1 = (l1_pgentry_t *)heap_start;
+        heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE);
+        for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
+            l1[j] = mk_l1_pgentry((i << L2_PAGETABLE_SHIFT) |
+                                   (j << L1_PAGETABLE_SHIFT) | 
+                                  __PAGE_HYPERVISOR);
+        idle_pg_table[i] = idle_pg_table[i + l2_table_offset(PAGE_OFFSET)] =
+            mk_l2_pgentry(virt_to_phys(l1) | __PAGE_HYPERVISOR);
+    }
+
+    return heap_start;
+}
+
+static void __memguard_change_range(void *p, unsigned long l, int guard)
+{
+    l1_pgentry_t *l1;
+    l2_pgentry_t *l2;
+    unsigned long _p = (unsigned long)p;
+    unsigned long _l = (unsigned long)l;
+
+    /* Ensure we are dealing with a page-aligned whole number of pages. */
+    ASSERT((_p&PAGE_MASK) != 0);
+    ASSERT((_l&PAGE_MASK) != 0);
+    ASSERT((_p&~PAGE_MASK) == 0);
+    ASSERT((_l&~PAGE_MASK) == 0);
+
+    while ( _l != 0 )
+    {
+        l2  = &idle_pg_table[l2_table_offset(_p)];
+        l1  = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
+        if ( guard )
+            *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) & ~_PAGE_PRESENT);
+        else
+            *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) | _PAGE_PRESENT);
+        _p += PAGE_SIZE;
+        _l -= PAGE_SIZE;
+    }
+}
+
+void memguard_guard_range(void *p, unsigned long l)
+{
+    __memguard_change_range(p, l, 1);
+    local_flush_tlb();
+}
+
+void memguard_unguard_range(void *p, unsigned long l)
+{
+    __memguard_change_range(p, l, 0);
+}
+
+int memguard_is_guarded(void *p)
+{
+    l1_pgentry_t *l1;
+    l2_pgentry_t *l2;
+    unsigned long _p = (unsigned long)p;
+    l2  = &idle_pg_table[l2_table_offset(_p)];
+    l1  = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
+    return !(l1_pgentry_val(*l1) & _PAGE_PRESENT);
+}
+
+#endif
diff --git a/xen/arch/x86/x86_32/usercopy.c b/xen/arch/x86/x86_32/usercopy.c
new file mode 100644 (file)
index 0000000..dc2d34c
--- /dev/null
@@ -0,0 +1,190 @@
+/* 
+ * User address space access functions.
+ * The non inlined parts of asm-i386/uaccess.h are here.
+ *
+ * Copyright 1997 Andi Kleen <ak@muc.de>
+ * Copyright 1997 Linus Torvalds
+ */
+#include <xen/config.h>
+#include <asm/uaccess.h>
+//#include <asm/mmx.h>
+
+#ifdef CONFIG_X86_USE_3DNOW_AND_WORKS
+
+unsigned long
+__generic_copy_to_user(void *to, const void *from, unsigned long n)
+{
+       if (access_ok(VERIFY_WRITE, to, n))
+       {
+               if(n<512)
+                       __copy_user(to,from,n);
+               else
+                       mmx_copy_user(to,from,n);
+       }
+       return n;
+}
+
+unsigned long
+__generic_copy_from_user(void *to, const void *from, unsigned long n)
+{
+       if (access_ok(VERIFY_READ, from, n))
+       {
+               if(n<512)
+                       __copy_user_zeroing(to,from,n);
+               else
+                       mmx_copy_user_zeroing(to, from, n);
+       }
+       else
+               memset(to, 0, n);
+       return n;
+}
+
+#else
+
+unsigned long
+__generic_copy_to_user(void *to, const void *from, unsigned long n)
+{
+       prefetch(from);
+       if (access_ok(VERIFY_WRITE, to, n))
+               __copy_user(to,from,n);
+       return n;
+}
+
+unsigned long
+__generic_copy_from_user(void *to, const void *from, unsigned long n)
+{
+       prefetchw(to);
+       if (access_ok(VERIFY_READ, from, n))
+               __copy_user_zeroing(to,from,n);
+       else
+               memset(to, 0, n);
+       return n;
+}
+
+#endif
+
+/*
+ * Copy a null terminated string from userspace.
+ */
+
+#define __do_strncpy_from_user(dst,src,count,res)                         \
+do {                                                                      \
+       int __d0, __d1, __d2;                                              \
+       __asm__ __volatile__(                                              \
+               "       testl %1,%1\n"                                     \
+               "       jz 2f\n"                                           \
+               "0:     lodsb\n"                                           \
+               "       stosb\n"                                           \
+               "       testb %%al,%%al\n"                                 \
+               "       jz 1f\n"                                           \
+               "       decl %1\n"                                         \
+               "       jnz 0b\n"                                          \
+               "1:     subl %1,%0\n"                                      \
+               "2:\n"                                                     \
+               ".section .fixup,\"ax\"\n"                                 \
+               "3:     movl %5,%0\n"                                      \
+               "       jmp 2b\n"                                          \
+               ".previous\n"                                              \
+               ".section __ex_table,\"a\"\n"                              \
+               "       .align 4\n"                                        \
+               "       .long 0b,3b\n"                                     \
+               ".previous"                                                \
+               : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1),      \
+                 "=&D" (__d2)                                             \
+               : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
+               : "memory");                                               \
+} while (0)
+
+long
+__strncpy_from_user(char *dst, const char *src, long count)
+{
+       long res;
+       __do_strncpy_from_user(dst, src, count, res);
+       return res;
+}
+
+long
+strncpy_from_user(char *dst, const char *src, long count)
+{
+       long res = -EFAULT;
+       if (access_ok(VERIFY_READ, src, 1))
+               __do_strncpy_from_user(dst, src, count, res);
+       return res;
+}
+
+
+/*
+ * Zero Userspace
+ */
+
+#define __do_clear_user(addr,size)                                     \
+do {                                                                   \
+       int __d0;                                                       \
+       __asm__ __volatile__(                                           \
+               "0:     rep; stosl\n"                                   \
+               "       movl %2,%0\n"                                   \
+               "1:     rep; stosb\n"                                   \
+               "2:\n"                                                  \
+               ".section .fixup,\"ax\"\n"                              \
+               "3:     lea 0(%2,%0,4),%0\n"                            \
+               "       jmp 2b\n"                                       \
+               ".previous\n"                                           \
+               ".section __ex_table,\"a\"\n"                           \
+               "       .align 4\n"                                     \
+               "       .long 0b,3b\n"                                  \
+               "       .long 1b,2b\n"                                  \
+               ".previous"                                             \
+               : "=&c"(size), "=&D" (__d0)                             \
+               : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0));     \
+} while (0)
+
+unsigned long
+clear_user(void *to, unsigned long n)
+{
+       if (access_ok(VERIFY_WRITE, to, n))
+               __do_clear_user(to, n);
+       return n;
+}
+
+unsigned long
+__clear_user(void *to, unsigned long n)
+{
+       __do_clear_user(to, n);
+       return n;
+}
+
+/*
+ * Return the size of a string (including the ending 0)
+ *
+ * Return 0 on exception, a value greater than N if too long
+ */
+
+long strnlen_user(const char *s, long n)
+{
+       unsigned long mask = -__addr_ok(s);
+       unsigned long res, tmp;
+
+       __asm__ __volatile__(
+               "       testl %0, %0\n"
+               "       jz 3f\n"
+               "       andl %0,%%ecx\n"
+               "0:     repne; scasb\n"
+               "       setne %%al\n"
+               "       subl %%ecx,%0\n"
+               "       addl %0,%%eax\n"
+               "1:\n"
+               ".section .fixup,\"ax\"\n"
+               "2:     xorl %%eax,%%eax\n"
+               "       jmp 1b\n"
+               "3:     movb $1,%%al\n"
+               "       jmp 1b\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .align 4\n"
+               "       .long 0b,2b\n"
+               ".previous"
+               :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp)
+               :"0" (n), "1" (s), "2" (0), "3" (mask)
+               :"cc");
+       return res & mask;
+}
diff --git a/xen/arch/x86/x86_32/xen.lds b/xen/arch/x86/x86_32/xen.lds
new file mode 100644 (file)
index 0000000..5947eba
--- /dev/null
@@ -0,0 +1,87 @@
+/* ld script to make i386 Linux kernel
+ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
+ */
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(start)
+SECTIONS
+{
+  . = 0xFC400000 + 0x100000;
+  _text = .;                   /* Text and read-only data */
+  .text : {
+       *(.text)
+       *(.fixup)
+       *(.gnu.warning)
+       } = 0x9090
+  .text.lock : { *(.text.lock) }       /* out-of-line lock text */
+
+  _etext = .;                  /* End of text section */
+
+  .rodata : { *(.rodata) *(.rodata.*) }
+  .kstrtab : { *(.kstrtab) }
+
+  . = ALIGN(16);               /* Exception table */
+  __start___ex_table = .;
+  __ex_table : { *(__ex_table) }
+  __stop___ex_table = .;
+
+  __start___ksymtab = .;       /* Kernel symbol table */
+  __ksymtab : { *(__ksymtab) }
+  __stop___ksymtab = .;
+
+  __start___kallsyms = .;      /* All kernel symbols */
+  __kallsyms : { *(__kallsyms) }
+  __stop___kallsyms = .;
+
+  .data : {                    /* Data */
+       *(.data)
+       CONSTRUCTORS
+       }
+
+  _edata = .;                  /* End of data section */
+
+  . = ALIGN(8192);             /* init_task */
+  .data.init_task : { *(.data.init_task) }
+
+  . = ALIGN(4096);             /* Init code and data */
+  __init_begin = .;
+  .text.init : { *(.text.init) }
+  .data.init : { *(.data.init) }
+  . = ALIGN(16);
+  __setup_start = .;
+  .setup.init : { *(.setup.init) }
+  __setup_end = .;
+  __initcall_start = .;
+  .initcall.init : { *(.initcall.init) }
+  __initcall_end = .;
+  . = ALIGN(4096);
+  __init_end = .;
+
+  . = ALIGN(4096);
+  .data.page_aligned : { *(.data.idt) }
+
+  . = ALIGN(32);
+  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+
+  __bss_start = .;             /* BSS */
+  .bss : {
+       *(.bss)
+       }
+  _end = . ;
+
+  /* Sections to be discarded */
+  /DISCARD/ : {
+       *(.text.exit)
+       *(.data.exit)
+       *(.exitcall.exit)
+       }
+
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+}
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/xen/arch/x86/x86_64/usercopy.c b/xen/arch/x86/x86_64/usercopy.c
new file mode 100644 (file)
index 0000000..e7c11fa
--- /dev/null
@@ -0,0 +1,136 @@
+/* 
+ * User address space access functions.
+ *
+ * Copyright 1997 Andi Kleen <ak@muc.de>
+ * Copyright 1997 Linus Torvalds
+ * Copyright 2002 Andi Kleen <ak@suse.de>
+ */
+#include <asm/uaccess.h>
+
+/*
+ * Copy a null terminated string from userspace.
+ */
+
+#define __do_strncpy_from_user(dst,src,count,res)                         \
+do {                                                                      \
+       long __d0, __d1, __d2;                                             \
+       __asm__ __volatile__(                                              \
+               "       testq %1,%1\n"                                     \
+               "       jz 2f\n"                                           \
+               "0:     lodsb\n"                                           \
+               "       stosb\n"                                           \
+               "       testb %%al,%%al\n"                                 \
+               "       jz 1f\n"                                           \
+               "       decq %1\n"                                         \
+               "       jnz 0b\n"                                          \
+               "1:     subq %1,%0\n"                                      \
+               "2:\n"                                                     \
+               ".section .fixup,\"ax\"\n"                                 \
+               "3:     movq %5,%0\n"                                      \
+               "       jmp 2b\n"                                          \
+               ".previous\n"                                              \
+               ".section __ex_table,\"a\"\n"                              \
+               "       .align 8\n"                                        \
+               "       .quad 0b,3b\n"                                     \
+               ".previous"                                                \
+               : "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1),      \
+                 "=&D" (__d2)                                             \
+               : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
+               : "memory");                                               \
+} while (0)
+
+long
+__strncpy_from_user(char *dst, const char *src, long count)
+{
+       long res;
+       __do_strncpy_from_user(dst, src, count, res);
+       return res;
+}
+
+long
+strncpy_from_user(char *dst, const char *src, long count)
+{
+       long res = -EFAULT;
+       if (access_ok(VERIFY_READ, src, 1))
+               __do_strncpy_from_user(dst, src, count, res);
+       return res;
+}
+
+/*
+ * Zero Userspace
+ */
+
+unsigned long __clear_user(void *addr, unsigned long size)
+{
+       long __d0;
+       /* no memory constraint because it doesn't change any memory gcc knows
+          about */
+       asm volatile(
+               "       testq  %[size8],%[size8]\n"
+               "       jz     4f\n"
+               "0:     movq %[zero],(%[dst])\n"
+               "       addq   %[eight],%[dst]\n"
+               "       decl %%ecx ; jnz   0b\n"
+               "4:     movq  %[size1],%%rcx\n"
+               "       testl %%ecx,%%ecx\n"
+               "       jz     2f\n"
+               "1:     movb   %b[zero],(%[dst])\n"
+               "       incq   %[dst]\n"
+               "       decl %%ecx ; jnz  1b\n"
+               "2:\n"
+               ".section .fixup,\"ax\"\n"
+               "3:     lea 0(%[size1],%[size8],8),%[size8]\n"
+               "       jmp 2b\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .align 8\n"
+               "       .quad 0b,3b\n"
+               "       .quad 1b,2b\n"
+               ".previous"
+               : [size8] "=c"(size), [dst] "=&D" (__d0)
+               : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst] "(addr),
+                 [zero] "r" (0UL), [eight] "r" (8UL));
+       return size;
+}
+
+
+unsigned long clear_user(void *to, unsigned long n)
+{
+       if (access_ok(VERIFY_WRITE, to, n))
+               return __clear_user(to, n);
+       return n;
+}
+
+/*
+ * Return the size of a string (including the ending 0)
+ *
+ * Return 0 on exception, a value greater than N if too long
+ */
+
+long strnlen_user(const char *s, long n)
+{
+       unsigned long res = 0;
+       char c;
+
+       if (!access_ok(VERIFY_READ, s, n))
+               return 0;
+
+       while (1) {
+               if (get_user(c, s))
+                       return 0;
+               if (!c)
+                       return res+1;
+               if (res>n)
+                       return n+1;
+               res++;
+               s++;
+       }
+}
+
+unsigned long copy_in_user(void *to, const void *from, unsigned len)
+{
+       if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) { 
+               return copy_user_generic(to, from, len);
+       } 
+       return len;             
+}
diff --git a/xen/arch/x86/x86_64/xen.lds b/xen/arch/x86/x86_64/xen.lds
new file mode 100644 (file)
index 0000000..caea240
--- /dev/null
@@ -0,0 +1,85 @@
+/* Excerpts written by Martin Mares <mj@atrey.karlin.mff.cuni.cz> */
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(start)
+SECTIONS
+{
+  . = 0xFFFF840000100000;
+  _text = .;                   /* Text and read-only data */
+  .text : {
+       *(.text)
+       *(.fixup)
+       *(.gnu.warning)
+       } = 0x9090
+  .text.lock : { *(.text.lock) }       /* out-of-line lock text */
+
+  _etext = .;                  /* End of text section */
+
+  .rodata : { *(.rodata) *(.rodata.*) }
+  .kstrtab : { *(.kstrtab) }
+
+  . = ALIGN(16);               /* Exception table */
+  __start___ex_table = .;
+  __ex_table : { *(__ex_table) }
+  __stop___ex_table = .;
+
+  __start___ksymtab = .;       /* Kernel symbol table */
+  __ksymtab : { *(__ksymtab) }
+  __stop___ksymtab = .;
+
+  __start___kallsyms = .;      /* All kernel symbols */
+  __kallsyms : { *(__kallsyms) }
+  __stop___kallsyms = .;
+
+  .data : {                    /* Data */
+       *(.data)
+       CONSTRUCTORS
+       }
+
+  _edata = .;                  /* End of data section */
+
+  . = ALIGN(8192);             /* init_task */
+  .data.init_task : { *(.data.init_task) }
+
+  . = ALIGN(4096);             /* Init code and data */
+  __init_begin = .;
+  .text.init : { *(.text.init) }
+  .data.init : { *(.data.init) }
+  . = ALIGN(16);
+  __setup_start = .;
+  .setup.init : { *(.setup.init) }
+  __setup_end = .;
+  __initcall_start = .;
+  .initcall.init : { *(.initcall.init) }
+  __initcall_end = .;
+  . = ALIGN(4096);
+  __init_end = .;
+
+  . = ALIGN(4096);
+  .data.page_aligned : { *(.data.idt) }
+
+  . = ALIGN(32);
+  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+
+  __bss_start = .;             /* BSS */
+  .bss : {
+       *(.bss)
+       }
+  _end = . ;
+
+  /* Sections to be discarded */
+  /DISCARD/ : {
+       *(.text.exit)
+       *(.data.exit)
+       *(.exitcall.exit)
+       }
+
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+}
diff --git a/xen/arch/x86/xen.lds b/xen/arch/x86/xen.lds
deleted file mode 100644 (file)
index 5947eba..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-/* ld script to make i386 Linux kernel
- * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
- */
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
-OUTPUT_ARCH(i386)
-ENTRY(start)
-SECTIONS
-{
-  . = 0xFC400000 + 0x100000;
-  _text = .;                   /* Text and read-only data */
-  .text : {
-       *(.text)
-       *(.fixup)
-       *(.gnu.warning)
-       } = 0x9090
-  .text.lock : { *(.text.lock) }       /* out-of-line lock text */
-
-  _etext = .;                  /* End of text section */
-
-  .rodata : { *(.rodata) *(.rodata.*) }
-  .kstrtab : { *(.kstrtab) }
-
-  . = ALIGN(16);               /* Exception table */
-  __start___ex_table = .;
-  __ex_table : { *(__ex_table) }
-  __stop___ex_table = .;
-
-  __start___ksymtab = .;       /* Kernel symbol table */
-  __ksymtab : { *(__ksymtab) }
-  __stop___ksymtab = .;
-
-  __start___kallsyms = .;      /* All kernel symbols */
-  __kallsyms : { *(__kallsyms) }
-  __stop___kallsyms = .;
-
-  .data : {                    /* Data */
-       *(.data)
-       CONSTRUCTORS
-       }
-
-  _edata = .;                  /* End of data section */
-
-  . = ALIGN(8192);             /* init_task */
-  .data.init_task : { *(.data.init_task) }
-
-  . = ALIGN(4096);             /* Init code and data */
-  __init_begin = .;
-  .text.init : { *(.text.init) }
-  .data.init : { *(.data.init) }
-  . = ALIGN(16);
-  __setup_start = .;
-  .setup.init : { *(.setup.init) }
-  __setup_end = .;
-  __initcall_start = .;
-  .initcall.init : { *(.initcall.init) }
-  __initcall_end = .;
-  . = ALIGN(4096);
-  __init_end = .;
-
-  . = ALIGN(4096);
-  .data.page_aligned : { *(.data.idt) }
-
-  . = ALIGN(32);
-  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
-
-  __bss_start = .;             /* BSS */
-  .bss : {
-       *(.bss)
-       }
-  _end = . ;
-
-  /* Sections to be discarded */
-  /DISCARD/ : {
-       *(.text.exit)
-       *(.data.exit)
-       *(.exitcall.exit)
-       }
-
-  /* Stabs debugging sections.  */
-  .stab 0 : { *(.stab) }
-  .stabstr 0 : { *(.stabstr) }
-  .stab.excl 0 : { *(.stab.excl) }
-  .stab.exclstr 0 : { *(.stab.exclstr) }
-  .stab.index 0 : { *(.stab.index) }
-  .stab.indexstr 0 : { *(.stab.indexstr) }
-  .comment 0 : { *(.comment) }
-}
index f61f6aeeddf509d54333bae8831b81317e72f4b4..f8cd4504518dcb45c1fe3f3ac5c83df7753db4db 100644 (file)
 #define CONFIG_ACPI_BOOT 1
 
 #define CONFIG_PCI 1
-#define CONFIG_PCI_BIOS 1
 #define CONFIG_PCI_DIRECT 1
+#if defined(__i386__)
+#define CONFIG_PCI_BIOS 1
+#endif
 
 #define CONFIG_IDE 1
 #define CONFIG_BLK_DEV_IDE 1
index 7c5212e50f52e0fb43f03cf6ef3b01ad2b64cabb..1e35752a8434c3250baf109a60a0989e5eb38890 100644 (file)
@@ -86,6 +86,8 @@ extern char _stext, _etext;
 #define __STR(x) #x
 #define STR(x) __STR(x)
 
+#if defined(__i386__)
+
 #define SAVE_ALL \
        "cld\n\t" \
        "pushl %gs\n\t" \
@@ -105,8 +107,11 @@ extern char _stext, _etext;
        "movl %edx,%fs\n\t" \
        "movl %edx,%gs\n\t"
 
-#define IRQ_NAME2(nr) nr##_interrupt(void)
-#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+#else
+
+#define SAVE_ALL
+
+#endif
 
 #define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v)
 #define XBUILD_SMP_INTERRUPT(x,v)\
@@ -115,7 +120,7 @@ asmlinkage void call_##x(void); \
 __asm__( \
 "\n"__ALIGN_STR"\n" \
 SYMBOL_NAME_STR(x) ":\n\t" \
-       "pushl $"#v"\n\t" \
+       "push"__OS" $"#v"\n\t" \
        SAVE_ALL \
        SYMBOL_NAME_STR(call_##x)":\n\t" \
        "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \
@@ -128,13 +133,13 @@ asmlinkage void call_##x(void); \
 __asm__( \
 "\n"__ALIGN_STR"\n" \
 SYMBOL_NAME_STR(x) ":\n\t" \
-       "pushl $"#v"\n\t" \
+       "push"__OS" $"#v"\n\t" \
        SAVE_ALL \
-       "movl %esp,%eax\n\t" \
-       "pushl %eax\n\t" \
+       "mov %"__OP"sp,%"__OP"ax\n\t" \
+       "push %"__OP"ax\n\t" \
        SYMBOL_NAME_STR(call_##x)":\n\t" \
        "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \
-       "addl $4,%esp\n\t" \
+       "add $4,%"__OP"sp\n\t" \
        "jmp ret_from_intr\n");
 
 #define BUILD_COMMON_IRQ() \
@@ -147,12 +152,15 @@ __asm__( \
        "call " SYMBOL_NAME_STR(do_IRQ) "\n\t" \
        "jmp ret_from_intr\n");
 
+#define IRQ_NAME2(nr) nr##_interrupt(void)
+#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+
 #define BUILD_IRQ(nr) \
 asmlinkage void IRQ_NAME(nr); \
 __asm__( \
 "\n"__ALIGN_STR"\n" \
 SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \
-       "pushl $"#nr"\n\t" \
+       "push"__OS" $"#nr"\n\t" \
        "jmp common_interrupt");
 
 extern unsigned long prof_cpu_mask;